diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py index 8a1a8869aa..8f67743951 100644 --- a/lmdeploy/cli/serve.py +++ b/lmdeploy/cli/serve.py @@ -110,7 +110,7 @@ def add_parser_api_server(): quant_policy = ArgumentHelper.quant_policy(pt_group) model_format = ArgumentHelper.model_format(pt_group) hf_overrides = ArgumentHelper.hf_overrides(pt_group) - enable_metrics = ArgumentHelper.enable_metrics(pt_group) + disable_metrics = ArgumentHelper.disable_metrics(pt_group) ArgumentHelper.dp(pt_group) ArgumentHelper.ep(pt_group) ArgumentHelper.enable_microbatch(pt_group) @@ -135,7 +135,7 @@ def add_parser_api_server(): tb_group._group_actions.append(quant_policy) tb_group._group_actions.append(model_format) tb_group._group_actions.append(hf_overrides) - tb_group._group_actions.append(enable_metrics) + tb_group._group_actions.append(disable_metrics) ArgumentHelper.rope_scaling_factor(tb_group) ArgumentHelper.num_tokens_per_iter(tb_group) ArgumentHelper.max_prefill_iters(tb_group) @@ -217,7 +217,7 @@ def api_server(args): max_prefill_token_num=args.max_prefill_token_num, enable_microbatch=args.enable_microbatch, enable_eplb=args.enable_eplb, - enable_metrics=args.enable_metrics, + enable_metrics=not args.disable_metrics, role=EngineRole[args.role], migration_backend=MigrationBackend[args.migration_backend], model_format=args.model_format, @@ -245,7 +245,7 @@ def api_server(args): num_tokens_per_iter=args.num_tokens_per_iter, max_prefill_iters=args.max_prefill_iters, communicator=args.communicator, - enable_metrics=args.enable_metrics, + enable_metrics=not args.disable_metrics, hf_overrides=args.hf_overrides) chat_template_config = get_chat_template(args.chat_template, args.model_path) diff --git a/lmdeploy/cli/utils.py b/lmdeploy/cli/utils.py index da94891b37..a53a3cdc86 100644 --- a/lmdeploy/cli/utils.py +++ b/lmdeploy/cli/utils.py @@ -594,9 +594,12 @@ def enable_eplb(parser): return parser.add_argument('--enable-eplb', action='store_true', help='enable eplb for specified model') @staticmethod - def enable_metrics(parser): - """Add argument enable_metrics to parser.""" - return parser.add_argument('--enable-metrics', action='store_true', default=False, help='enable metrics system') + def disable_metrics(parser): + """Add argument disable_metrics to parser.""" + return parser.add_argument('--disable-metrics', + action='store_true', + default=False, + help='disable metrics system') # For Disaggregation @staticmethod @@ -623,7 +626,7 @@ def disable_vision_encoder(parser): return parser.add_argument('--disable-vision-encoder', action='store_true', default=False, - help='enable metrics system') + help='disable multimodal encoder') @staticmethod def logprobs_mode(parser): diff --git a/lmdeploy/messages.py b/lmdeploy/messages.py index 90a38a7f6a..57725eb23f 100644 --- a/lmdeploy/messages.py +++ b/lmdeploy/messages.py @@ -259,7 +259,7 @@ class TurbomindEngineConfig: empty_init: bool = False communicator: str = 'nccl' hf_overrides: Optional[Dict[str, Any]] = None - enable_metrics: bool = False + enable_metrics: bool = True def __post_init__(self): """Check input validation.""" @@ -372,7 +372,7 @@ class PytorchEngineConfig: enable_mp_engine: bool = False mp_engine_backend: str = 'mp' model_format: str = None - enable_metrics: bool = False + enable_metrics: bool = True hf_overrides: Optional[Dict[str, Any]] = None disable_vision_encoder: bool = False logprobs_mode: str = None