From fbd71a7485bd96d6d154b582b815da49c3f6c239 Mon Sep 17 00:00:00 2001 From: tianlef <1095012807@qq.com> Date: Tue, 16 Sep 2025 15:52:09 +0800 Subject: [PATCH] [CE]add plas attention config --- benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml diff --git a/benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml b/benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml new file mode 100644 index 00000000000..6ec412b1871 --- /dev/null +++ b/benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml @@ -0,0 +1,6 @@ +tensor_parallel_size: 1 +max_model_len: 131072 +max_num_seqs: 32 +quantization: wint4 +max_num_batched_tokens: 8192 +plas_attention_config: '{"plas_encoder_top_k_left": 50, "plas_encoder_top_k_right": 60, "plas_decoder_top_k_left": 100, "plas_decoder_top_k_right": 120}'