-
Notifications
You must be signed in to change notification settings - Fork 285
feat(misc): Profiler support #1121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| from lightllm.server.multi_level_kv_cache.cpu_cache_client import CpuKvCacheClient | ||
| from lightllm.server.core.objs.shm_objs_io_buffer import ShmObjsIOBuffer | ||
| from lightllm.utils.log_utils import init_logger, log_time_ready | ||
| from lightllm.utils.profiler import ProcessProfiler, ProfilerCmd | ||
| from lightllm.server.router.token_load import TokenLoad | ||
| from lightllm.server.metrics.manager import MetricClient | ||
| from lightllm.common.basemodel.infer_lock import g_router_lock | ||
|
|
@@ -106,6 +107,10 @@ def __init__(self, args: StartArgs): | |
| if not self.args.enable_cpu_cache | ||
| else CpuKvCacheClient(only_create_meta_data=True, init_shm_data=False) | ||
| ) | ||
|
|
||
| self.profiler = ( | ||
| ProcessProfiler(mode=args.enable_profiling, name="lightllm-router") if args.enable_profiling else None | ||
| ) | ||
| return | ||
|
|
||
| async def wait_to_model_ready(self): | ||
|
|
@@ -508,16 +513,28 @@ def _multinode_tp_generate_new_batch(self): | |
| raise e | ||
| return | ||
|
|
||
| async def _profiler_cmd(self, cmd_obj: ProfilerCmd): | ||
| self.profiler.cmd(cmd_obj) | ||
|
|
||
| cmd = ProfilerCmd(cmd=cmd_obj.cmd) | ||
| while not self.shm_reqs_io_buffer.is_empty(): | ||
| await asyncio.sleep(0.02) | ||
|
Comment on lines
+520
to
+521
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
|
|
||
| self.shm_reqs_io_buffer.write_obj([cmd]) | ||
| self.shm_reqs_io_buffer.set_ready() | ||
|
|
||
| async def _recv_new_reqs_and_schedule(self): | ||
| if not hasattr(self, "recv_max_count"): | ||
| self.recv_max_count = 64 | ||
|
|
||
| try: | ||
| # 一次最多从 zmq 中取 recv_max_count 个请求,防止 zmq 队列中请求数量过多导致阻塞了主循环。 | ||
| for _ in range(self.recv_max_count): | ||
| recv_req: GroupReqIndexes = self.zmq_recv_socket.recv_pyobj(zmq.NOBLOCK) | ||
| recv_req: Union[GroupReqIndexes, ProfilerCmd] = self.zmq_recv_socket.recv_pyobj(zmq.NOBLOCK) | ||
| if isinstance(recv_req, GroupReqIndexes): | ||
| self._add_req(recv_req) | ||
| elif isinstance(recv_req, ProfilerCmd): | ||
| await self._profiler_cmd(recv_req) | ||
| else: | ||
| assert False, f"Error Req Inf {recv_req}" | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
| import time | ||
| import threading | ||
| import torch.distributed as dist | ||
| from typing import List, Tuple, Callable, Optional | ||
| from typing import Dict, List, Literal, Tuple, Callable, Optional | ||
| from transformers.configuration_utils import PretrainedConfig | ||
| from lightllm.utils.infer_utils import set_random_seed | ||
| from lightllm.utils.log_utils import init_logger | ||
|
|
@@ -39,6 +39,7 @@ | |
| from lightllm.common.basemodel.triton_kernel.gather_token_id import scatter_token | ||
| from lightllm.server.pd_io_struct import NIXLChunckedTransTaskRet | ||
| from .multi_level_kv_cache import MultiLevelKvCacheModule | ||
| from lightllm.utils.profiler import ProcessProfiler, ProfilerCmd | ||
|
|
||
|
|
||
| class ModeBackend: | ||
|
|
@@ -218,11 +219,19 @@ def init_model(self, kvargs): | |
| if self.args.mtp_mode: | ||
| self.init_mtp_draft_model(kvargs) | ||
|
|
||
| self.profiler: Optional[ProcessProfiler] = None | ||
| if self.args.enable_profiling: | ||
| self.profiler = ProcessProfiler( | ||
| mode=self.args.enable_profiling, | ||
| name=f"lightllm-model_backend-node{self.node_rank}_dev{get_current_device_id()}", | ||
| ) | ||
| self.profiling_active = False | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Initialize it in self.profiling_active = False
self.profiling_lock = threading.Lock()Then use it when accessing In with self.profiling_lock:
if self.profiler.is_active != self.profiling_active:
if self.profiling_active:
self.profiler.start()
else:
self.profiler.stop()In with self.profiling_lock:
if obj.cmd == "start":
self.profiling_active = True
elif obj.cmd == "stop":
self.profiling_active = False |
||
|
|
||
| # 启动infer_loop_thread, 启动两个线程进行推理,对于具备双batch推理折叠得场景 | ||
| # 可以降低 cpu overhead,大幅提升gpu得使用率。 | ||
| self.infer_loop_thread = threading.Thread(target=self.infer_loop, daemon=True) | ||
| self.infer_loop_thread = threading.Thread(target=self.infer_loop, daemon=True, name="loop0") | ||
| self.infer_loop_thread.start() | ||
| self.infer_loop_thread1 = threading.Thread(target=self.infer_loop, daemon=True) | ||
| self.infer_loop_thread1 = threading.Thread(target=self.infer_loop, daemon=True, name="loop1") | ||
| self.infer_loop_thread1.start() | ||
| return | ||
|
|
||
|
|
@@ -308,6 +317,14 @@ def _try_read_new_reqs(self): | |
| self._try_read_new_reqs_multinode_tp() | ||
| else: | ||
| self._try_read_new_reqs_normal() | ||
|
|
||
| # on each loop thread | ||
| if self.profiler is not None: | ||
| if self.profiler.is_active != self.profiling_active: | ||
| if self.profiling_active: | ||
| self.profiler.start() | ||
| else: | ||
| self.profiler.stop() | ||
| return | ||
|
|
||
| def _try_read_new_reqs_normal(self): | ||
|
|
@@ -373,6 +390,11 @@ def _read_reqs_buffer_and_init_reqs(self): | |
| if obj.req_id in g_infer_context.requests_mapping: | ||
| req: InferReq = g_infer_context.requests_mapping[obj.req_id] | ||
| req.infer_aborted = True | ||
| elif isinstance(obj, ProfilerCmd): | ||
| if obj.cmd == "start": | ||
| self.profiling_active = True | ||
| elif obj.cmd == "stop": | ||
| self.profiling_active = False | ||
| else: | ||
| assert False, f"error type {type(obj)}" | ||
| if init_reqs: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,7 +7,7 @@ | |
| import pickle | ||
| import inspect | ||
| import setproctitle | ||
| from typing import List | ||
| from typing import List, Union | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| from lightllm.server.core.objs.io_objs.group_req import GroupReqIndexes | ||
| from lightllm.server.core.objs import ShmReqManager, StartArgs | ||
|
|
||
|
|
@@ -18,6 +18,7 @@ | |
| from lightllm.utils.graceful_utils import graceful_registry | ||
| from lightllm.utils.process_check import start_parent_check_thread | ||
| from lightllm.utils.envs_utils import get_unique_server_name | ||
| from lightllm.utils.profiler import ProcessProfiler, ProfilerCmd | ||
| from rpyc.utils.classic import obtain | ||
|
|
||
|
|
||
|
|
@@ -58,6 +59,9 @@ def __init__( | |
| self.args = args | ||
| self.visual_model_rpc_ports = visual_model_rpc_ports | ||
| self.shm_req_manager = ShmReqManager() | ||
| self.profiler: "ProcessProfiler|None" = ( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| ProcessProfiler(args.enable_profiling, name="lightllm-visual_server") if args.enable_profiling else None | ||
| ) | ||
|
|
||
| async def wait_to_model_ready(self): | ||
|
|
||
|
|
@@ -90,6 +94,7 @@ async def wait_to_model_ready(self): | |
| "quant_type": self.args.vit_quant_type, | ||
| "quant_cfg": self.args.vit_quant_cfg, | ||
| "max_batch_size": min(self.infer_batch_size // self.vit_dp, 1), | ||
| "profiler": self.args.enable_profiling, | ||
| } | ||
| init_model_ret.append(self.model_rpcs[dp_rank_id][tp_rank_id].init_model(kvargs)) | ||
| await asyncio.gather(*init_model_ret) | ||
|
|
@@ -171,9 +176,19 @@ async def loop_for_netio_req(self): | |
| while True: | ||
| try: | ||
| for _ in range(self.visual_recv_max_count): | ||
| recv_req: GroupReqIndexes = self.zmq_recv_socket.recv_pyobj(zmq.NOBLOCK) | ||
| recv_req: Union[GroupReqIndexes, ProfilerCmd] = self.zmq_recv_socket.recv_pyobj(zmq.NOBLOCK) | ||
| if isinstance(recv_req, GroupReqIndexes): | ||
| self.waiting_reqs.append(recv_req) | ||
| elif isinstance(recv_req, ProfilerCmd): | ||
| self.profiler.cmd(recv_req) | ||
| tasks = [] | ||
| for vit_dp_rank in range(self.vit_dp): | ||
| for vit_tp_rank in range(self.vit_tp): | ||
| task = asyncio.create_task( | ||
| self.model_rpcs[vit_dp_rank][vit_tp_rank].profiler_cmd(recv_req) | ||
| ) | ||
| tasks.append(task) | ||
| await asyncio.gather(*tasks) | ||
| else: | ||
| assert False, f"Error Req Inf {recv_req}" | ||
| self.visual_recv_max_count = min(self.visual_recv_max_count * 1.3, 256) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are a few grammatical errors and typos in the help string that could be corrected for clarity.