From 662f2b0698b8241d285e9d653e7d2bccc851b6ad Mon Sep 17 00:00:00 2001 From: ophilia-lee Date: Mon, 27 Oct 2025 14:55:07 +0800 Subject: [PATCH 1/3] =?UTF-8?q?benchmark=E5=B7=A5=E5=85=B7=E9=80=82?= =?UTF-8?q?=E9=85=8DSGLang=E6=A1=86=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{vLLM_default.yaml => request.yaml} | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) rename benchmarks/yaml/request_yaml/{vLLM_default.yaml => request.yaml} (53%) diff --git a/benchmarks/yaml/request_yaml/vLLM_default.yaml b/benchmarks/yaml/request_yaml/request.yaml similarity index 53% rename from benchmarks/yaml/request_yaml/vLLM_default.yaml rename to benchmarks/yaml/request_yaml/request.yaml index a6385823b5f..9fc603354b6 100644 --- a/benchmarks/yaml/request_yaml/vLLM_default.yaml +++ b/benchmarks/yaml/request_yaml/request.yaml @@ -1,11 +1,11 @@ -top_p: 1.0 -temperature: 1.0 -metadata: - min_tokens: 1 -max_tokens: 30721 +top_p: 0.8 +temperature: 0.8 +max_tokens: 12288 repetition_penalty: 1.0 frequency_penalty: 0 presence_penalty: 0 -skip_special_tokens: false +metadata: + enable_thinking: false + min_tokens: 1 chat_template_kwargs: - enable_thinking: true + enable_thinking: false From 5ac266e28873aecc2ee9b6f02d278eb59888a451 Mon Sep 17 00:00:00 2001 From: ophilia-lee Date: Mon, 27 Oct 2025 14:58:02 +0800 Subject: [PATCH 2/3] =?UTF-8?q?benchmark=E5=B7=A5=E5=85=B7=E9=80=82?= =?UTF-8?q?=E9=85=8DSGLang=E6=A1=86=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/backend_request_func.py | 14 +++++++++----- benchmarks/benchmark_serving.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 837d9df91e9..545e9870e72 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -128,14 +128,14 @@ async def async_request_eb_openai_chat_completions( chunk = chunk_bytes.decode("utf-8").removeprefix("data: ") if chunk != "[DONE]": - # print("####chunk:", chunk, type(chunk)) + #print("####chunk:", chunk, type(chunk)) timestamp = time.perf_counter() data = json.loads(chunk) if request_id == "None" and "id" in data: request_id = data["id"] - if choices := data.get("choices"): + if (choices := data.get("choices")) and choices[0]["delta"].get("content"): content = choices[0]["delta"].get("content") reason_content = choices[0]["delta"].get("reasoning_content") # First token @@ -143,9 +143,12 @@ async def async_request_eb_openai_chat_completions( ttft = timestamp - st output.ttft = ttft # cached_tokens - output.prompt_len = ( - data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0) - ) + if data["usage"] and data["usage"].get("prompt_tokens_details", {}): + output.prompt_len = ( + data["usage"].get("prompt_tokens_details", {}).get("cached_tokens", 0) + ) + else: + output.prompt_len = 0 # Decoding phase else: @@ -157,6 +160,7 @@ async def async_request_eb_openai_chat_completions( elif usage := data.get("usage", {}): output.output_tokens = usage.get("completion_tokens", 0) output.prompt_tokens = usage.get("prompt_tokens", 0) + most_recent_timestamp = timestamp diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index ce072555fa7..fb13301c4e4 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -635,7 +635,7 @@ def benchmark_metrics( goodput_config_dict = check_goodput_args(args) metrics, actual_output_lens = calculate_metrics( - input_requests=input_requests, + # input_requests=input_requests, outputs=outputs, dur_s=benchmark_duration, selected_percentiles=selected_percentiles, From 14fd833b251472ba3a76e20663e6adc30b90c3b1 Mon Sep 17 00:00:00 2001 From: ophilia-lee Date: Mon, 27 Oct 2025 15:09:44 +0800 Subject: [PATCH 3/3] =?UTF-8?q?benchmark=E5=B7=A5=E5=85=B7=E9=80=82?= =?UTF-8?q?=E9=85=8DSGLang=E6=A1=86=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/backend_request_func.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 545e9870e72..596804331f2 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -134,8 +134,8 @@ async def async_request_eb_openai_chat_completions( if request_id == "None" and "id" in data: request_id = data["id"] - - if (choices := data.get("choices")) and choices[0]["delta"].get("content"): + + if choices := data.get("choices"): content = choices[0]["delta"].get("content") reason_content = choices[0]["delta"].get("reasoning_content") # First token