In [None]:
from openai import OpenAI
import pandas as pd
import json
import random
import concurrent.futures
from typing import List, Tuple
client_deepseek = OpenAI(api_key="xxx", base_url="https://api.deepseek.com") #  deepseek api key
client_qwen = OpenAI(api_key="xxx", base_url="https://dashscope.aliyuncs.com/compatible-mode/v1") #  qwen api key

# functions

In [None]:
def prompt_reasoningmodel(instruction, user_prompt, client, model_name) -> Tuple[str, str]:
    if instruction is not None:
        messages = [
            {"role": "system", "content": instruction},
            {"role": "user", "content": user_prompt}
        ]
    else:
        messages = [
            {"role": "user", "content": user_prompt}
        ]
    if model_name == "qwen3-8b":
        completion = client.chat.completions.create(
            model="qwen3-8b",
            messages=messages,
            stream=True,
            stream_options={"include_usage": True}
        )
        reasoning_content = ""
        content = ""
        for chunk in completion:
            if chunk.choices:
                if chunk.choices[0].delta.content:
                    content += chunk.choices[0].delta.content
                elif chunk.choices[0].delta.reasoning_content:
                    reasoning_content += chunk.choices[0].delta.reasoning_content
        print(content)
        return reasoning_content, content
    else:
        response = client.chat.completions.create(
            model=model_name,
            messages=messages
        )
        reasoning_content = response.choices[0].message.reasoning_content
        content = response.choices[0].message.content
        print(content)
        return reasoning_content, content

def batch_prompt(instruction_list, q_list, client, model_name, max_workers: int = 5) -> List[Tuple[str, str]]:
    if instruction_list is None:
        instruction_list = [None] * len(q_list)
    
    results = [None] * len(q_list)
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_idx = {
            executor.submit(prompt_reasoningmodel, instruction_list[idx], q, client, model_name): idx
            for idx, q in enumerate(q_list)
        }
        
        for future in concurrent.futures.as_completed(future_to_idx):
            idx = future_to_idx[future]
            try:
                results[idx] = future.result()
            except Exception as e:
                results[idx] = (f"error: {str(e)}", f"error: {str(e)}")
    
    return results

# RJUA

In [None]:
rjua_train = []
with open("../dataset/rjua/RJUA_train.json", "r", encoding="utf-8") as f:
    for line in f:
        rjua_train.append(json.loads(line))  # 逐行加载
rjua_train_questions = [item["question"] for item in rjua_train]
rjua_train_answers = [item["answer"] for item in rjua_train]

rjua_test = []
with open("../dataset/rjua/RJUA_test.json", "r", encoding="utf-8") as f:
    for line in f:
        rjua_test.append(json.loads(line))  # 逐行加载
rjua_test_questions = [item["question"] for item in rjua_test]
rjua_test_answers = [item["answer"] for item in rjua_test]
rjua_test_sample = pd.DataFrame({
    "question": rjua_test_questions,
    "answer": rjua_test_answers
})
rjua_all_questions = rjua_train_questions + rjua_test_questions
rjua_all_answers = rjua_train_answers + rjua_test_answers

In [None]:
rjua_all_qwen_results = batch_prompt(instruction_list = None, q_list = rjua_all_questions,
                          client= client_qwen, model_name = "qwen3-8b", max_workers = 5)
rjua_all_qwen_COTs = [res[0] for res in rjua_all_qwen_results]
rjua_all_qwen_answers = [res[1] for res in rjua_all_qwen_results]
result_dict = {
    "rjua_all_qwen_COTs": rjua_all_qwen_COTs,
    "rjua_all_qwen_answers": rjua_all_qwen_answers,
}
import pickle
with open("../dataset/rjua/rjua_qwen_results.pkl", "wb") as f:
    pickle.dump(result_dict, f)

rjua_all_r17b_results = batch_prompt(instruction_list = None, q_list = rjua_all_questions,
                          client= client_qwen, model_name = "deepseek-r1-distill-qwen-7b", max_workers = 5)
rjua_all_r17b_COTs = [res[0] for res in rjua_all_r17b_results]
rjua_all_r17b_answers = [res[1] for res in rjua_all_r17b_results]
result_dict = {
    "rjua_all_r17b_COTs": rjua_all_r17b_COTs,
    "rjua_all_r17b_answers": rjua_all_r17b_answers,
}

import pickle
with open("../dataset/rjua/rjua_r17b_results.pkl", "wb") as f:
    pickle.dump(result_dict, f)

# GenMedGPT

In [None]:
with open("../dataset/genmedgpt/GenMedGPT_data.pkl", "rb") as f:
    genmedgpt_data = pd.read_pickle(f)
genmedgpt_data_train_instructions = genmedgpt_data["train_instructions"]
genmedgpt_data_train_questions = genmedgpt_data["train_questions"]
genmedgpt_data_train_answers = genmedgpt_data["train_answers"]
genmedgpt_data_test_instructions = genmedgpt_data["test_instructions"]
genmedgpt_data_test_questions = genmedgpt_data["test_questions"]
genmedgpt_data_test_answers = genmedgpt_data["test_answers"]


In [None]:

gemmedgpt_train_qwen_results = batch_prompt(instruction_list = genmedgpt_data_train_instructions, q_list = genmedgpt_data_train_questions,
                          client= client_qwen, model_name = "qwen3-8b", max_workers = 5)
gemmedgpt_train_qwen_COTs = [res[0] for res in gemmedgpt_train_qwen_results]
gemmedgpt_train_qwen_answers = [res[1] for res in gemmedgpt_train_qwen_results]

gemmedgpt_test_qwen_results = batch_prompt(instruction_list = genmedgpt_data_test_instructions, q_list = genmedgpt_data_test_questions,
                          client= client_qwen, model_name = "qwen3-8b", max_workers = 5)
gemmedgpt_test_qwen_COTs = [res[0] for res in gemmedgpt_test_qwen_results]
gemmedgpt_test_qwen_answers = [res[1] for res in gemmedgpt_test_qwen_results]

result_dict = {
    "genmedgpt_train_qwen_COTs": gemmedgpt_train_qwen_COTs,
    "genmedgpt_train_qwen_answers": gemmedgpt_train_qwen_answers,
    "genmedgpt_test_qwen_COTs": gemmedgpt_test_qwen_COTs,
    "genmedgpt_test_qwen_answers": gemmedgpt_test_qwen_answers
}

import pickle
with open("../dataset/genmedgpt/GenMedGPT_qwen_results.pkl", "wb") as f:
    pickle.dump(result_dict, f)