From 65074ca162c0587f3910f15b070962bf685853f0 Mon Sep 17 00:00:00 2001 From: tianlef <1095012807@qq.com> Date: Tue, 21 Oct 2025 16:34:13 +0800 Subject: [PATCH] [Doc]add deepseek wint4 ce --- benchmarks/yaml/deepseek-32k-tp8-wint4.yaml | 9 +++++++++ benchmarks/yaml/request_yaml/deepseek-32k.yaml | 10 ++++++++++ 2 files changed, 19 insertions(+) create mode 100644 benchmarks/yaml/deepseek-32k-tp8-wint4.yaml create mode 100644 benchmarks/yaml/request_yaml/deepseek-32k.yaml diff --git a/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml new file mode 100644 index 00000000000..421c8e34df8 --- /dev/null +++ b/benchmarks/yaml/deepseek-32k-tp8-wint4.yaml @@ -0,0 +1,9 @@ +quantization: wint4 +load_choices: "default_v1" +graph_optimization_config: + use_cudagraph: True + use_unique_memory_pool: True +no_enable_prefix_caching: True +max_num_seqs: 256 +max_model_len: 32768 +tensor_parallel_size: 8 diff --git a/benchmarks/yaml/request_yaml/deepseek-32k.yaml b/benchmarks/yaml/request_yaml/deepseek-32k.yaml new file mode 100644 index 00000000000..12d1198a6f9 --- /dev/null +++ b/benchmarks/yaml/request_yaml/deepseek-32k.yaml @@ -0,0 +1,10 @@ +temperature: 0.8 +top_p: 0.8 +presence_penalty: 0 +repetition_penalty: 1.0 +frequency_penalty: 0 +max_tokens: 12288 +metadata: + min_tokens: 1 +chat_template_kwargs: + enable_thinking: false