diff --git a/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml b/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml new file mode 100644 index 00000000000..d159e676f60 --- /dev/null +++ b/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml @@ -0,0 +1,6 @@ +max_model_len: 32768 +max_num_seqs: 128 +gpu_memory_utilization: 0.85 +tensor_parallel_size: 1 +limit_mm_per_prompt: '{"image": 100, "video": 100}' +enable_mm: True \ No newline at end of file diff --git a/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml b/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml new file mode 100644 index 00000000000..0c9a944e699 --- /dev/null +++ b/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml @@ -0,0 +1,8 @@ +top_p: 0.8 +temperature: 0.7 +metadata: + min_tokens: 1 +max_tokens: 32768 +repetition_penalty: 1.05 +frequency_penalty: 0 +presence_penalty: 0 \ No newline at end of file