diff --git a/benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml b/benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml new file mode 100644 index 00000000000..6ec412b1871 --- /dev/null +++ b/benchmarks/yaml/eb45-128k-wint4-tp1-plas.yaml @@ -0,0 +1,6 @@ +tensor_parallel_size: 1 +max_model_len: 131072 +max_num_seqs: 32 +quantization: wint4 +max_num_batched_tokens: 8192 +plas_attention_config: '{"plas_encoder_top_k_left": 50, "plas_encoder_top_k_right": 60, "plas_decoder_top_k_left": 100, "plas_decoder_top_k_right": 120}'