In [None]:
# 部署模型
!vllm serve /home/jibing/test4vl/test4Medbench/model/Qwen3-8B \
  --port 8060 \
  --dtype bfloat16 \
  --tensor-parallel-size 1 \
  --cpu-offload-gb 0 \
  --gpu-memory-utilization 0.75 \
  --max-model-len 8126 \
  --enable-prefix-caching \
  --enable-reasoning \
  --reasoning-parser deepseek_r1\
  --enable-auto-tool-choice \
  --tool-call-parser hermes \
  --trust-remote-code


# MedBench数据集自动评测流程
利用已部署的 vllm 模型，对 MedBench 数据集进行自动推理，并生成评测平台要求的提交文件。

In [None]:
import os
import json
import requests
from tqdm import tqdm

# 配置路径和API
medbench_dir = './MedBench'  # 替换为你的MedBench文件夹路径
output_dir = './demo4test_output'  # 预测结果输出路径
os.makedirs(output_dir, exist_ok=True)
vllm_api_url = 'http://127.0.0.1:8000/v1/chat/completions'  # OpenAI兼容API路径

def build_prompt(item):
    """根据题目类型构造模型输入"""
    q = item['question']
    opts = item.get('options', None)
    # 针对不同任务类型可自定义prompt
    if opts:
        options_text = '\n'.join([f"{chr(65+i)}. {opt}" for i, opt in enumerate(opts)])
        prompt = f"{q}\n{options_text}\n请直接给出正确答案的选项字母。"
    else:
        prompt = f"{q}\n请直接输出答案，不要输出其他内容。"
    return prompt

def filter_thoughts(text):
    """
    过滤掉<think>...</think>标签包裹的内容，只保留标签外的内容。
    """
    import re
    # 移除所有<think>...</think>内容
    return re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL).strip()

def query_vllm(prompt, model="你的模型名称"):
    """调用vllm OpenAI兼容API获取模型答案，并过滤<think>内容"""
    headers = {"Authorization": "Bearer EMPTY"}
    payload = {
        "model": model,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "max_tokens": 6000,
        "temperature": 0.0
    }
    response = requests.post(vllm_api_url, headers=headers, json=payload, timeout=60)
    response.raise_for_status()
    raw_answer = response.json()['choices'][0]['message']['content'].strip()
    return filter_thoughts(raw_answer)

# 获取可用模型名称（可选，或直接写死模型名）
def get_vllm_models():
    try:
        resp = requests.get("http://127.0.0.1:8000/v1/models")
        resp.raise_for_status()
        models = resp.json().get("data", [])
        return [m["id"] for m in models]
    except Exception as e:
        print(f"获取模型列表失败: {e}")
        return []

# 只处理_test.jsonl文件
test_files = []
for root, dirs, files in os.walk(medbench_dir):
    for fname in files:
        if fname.endswith('_test.jsonl'):
            test_files.append((root, fname))

# 获取模型名
models = get_vllm_models()
if not models:
    raise RuntimeError("未检测到vLLM已加载模型，请检查后端。")
model_name = models[0]  # 或手动指定

for root, fname in tqdm(test_files, desc="全部文件进度"):
    input_path = os.path.join(root, fname)
    rel_dir = os.path.relpath(root, medbench_dir)
    output_subdir = os.path.join(output_dir, rel_dir)
    os.makedirs(output_subdir, exist_ok=True)
    output_path = os.path.join(output_subdir, fname)


    print(f"正在测试问题集：{input_path}")
    # 支持断点续跑：已存在的输出文件，跳过已完成的样本
    finished_ids = set()
    if os.path.exists(output_path):
        with open(output_path, 'r', encoding='utf-8') as fout:
            for line in fout:
                try:
                    item = json.loads(line)
                    # 用question字段唯一标识（如有id字段可用id更好）
                    finished_ids.add(item.get('question', ''))
                except Exception:
                    continue

    with open(input_path, 'r', encoding='utf-8') as fin:
        lines = fin.readlines()
    with open(output_path, 'a', encoding='utf-8') as fout:  # 追加写入
        for line in tqdm(lines, desc=f"推理 {fname}", leave=False):
            item = json.loads(line)
            # 跳过已完成的样本
            if item.get('question', '') in finished_ids:
                continue
            prompt = build_prompt(item)
            try:
                answer = query_vllm(prompt, model=model_name)
            except Exception as e:
                answer = ""
            item['answer'] = answer
            fout.write(json.dumps(item, ensure_ascii=False) + '\n')

print("所有_test.jsonl数据集推理完成，结果已保存到", output_dir)

# 打包提交
将所有预测结果文件放入 MedBench 文件夹，并压缩为 MedBench.zip，按平台要求提交。

In [None]:
import shutil

shutil.make_archive('MedBench', 'zip', root_dir=output_dir)
print("已生成 MedBench.zip，可提交评测平台。")