In [None]:
import requests
import json
from tqdm import tqdm
import os
# === 火山引擎 API 设置 ===
API_KEY = "2b064694-cb3b-49e3-bb6e-55ef9ad8351f"
API_URL = "https://ark.cn-beijing.volces.com/api/v3"  # 请替换为你所使用的大模型接口

HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}"
}

# === 加载检索结果（来自上一步的 JSON 文件）===
with open("../retrieval_result/QA_ds/embedding_0.7(20).json", "r", encoding="utf-8") as f:
    retrieval_results = json.load(f)["results"]

# === 构建 Prompt（模板）===
def build_prompt(query, documents, max_docs=3):
    context = ""
    for i, doc in enumerate(documents[:max_docs]):
        content = doc["content"].strip().replace("\n", " ")
        metadata = doc["metadata"]
        context += f"[文档{i+1} - 来源: {metadata.get('source_file')}, 页码: {metadata.get('page_num')}]:\n{content}\n\n"
    
    prompt = f"""你是一名保险知识问答助手，请基于以下参考资料回答用户的问题。

【参考资料】
{context}

【问题】
{query}

【回答】
"""
    return prompt

# === 请求火山引擎大模型 API ===
def call_model(prompt, temperature=0.3):
    payload = {
        "model": "deepseek-r1-distill-qwen-32b-250120",  # 替换为你实际使用的模型，比如 glm-4、baichuan2-13b-chat、qwen-max 等
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "temperature": temperature,
        "top_p": 0.9,
        "max_tokens": 1024
    }

    response = requests.post(API_URL, headers=HEADERS, data=json.dumps(payload))
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        print(f"API Error: {response.status_code}, {response.text}")
        return "生成失败"

# === 主流程：生成回答并保存 ===
output_results = []

for item in tqdm(retrieval_results, desc="生成中"):
    query = item["query"]
    correct_answer = item["correct_answer"]
    retrieved_docs = item["retrieved_documents"]

    prompt = build_prompt(query, retrieved_docs)
    generated_answer = call_model(prompt)

    output_results.append({
        "query": query,
        "reference_answer": correct_answer,
        "generated_answer": generated_answer,
        "source_docs": retrieved_docs
    })

# === 保存生成结果 ===
output_path = "./generation_result/QA_ds/generation_doubao.json"
os.makedirs(os.path.dirname(output_path), exist_ok=True)

with open(output_path, "w", encoding="utf-8") as f:
    json.dump(output_results, f, ensure_ascii=False, indent=2)

print(f"生成完成，保存至 {output_path}")


In [None]:
import requests
import json
from tqdm import tqdm
import os
from openai import OpenAI


# === CHAT_MODEL 类封装 ===
class CHAT_MODEL:
    def __init__(self, api_key, base_url, model_name):
        self.llm = OpenAI(
            api_key=api_key,
            base_url=base_url
        )
        self.model_name = model_name

    def chat(self, user_prompt):
        completion = self.llm.chat.completions.create(
            model=self.model_name,
            messages=[
                {"role": "user", "content": user_prompt},
            ],
        )
        response = completion.choices[0].message.content
        return response


# === 构建 Prompt（模板）===
def build_prompt(query, documents, max_docs=3):
    context = ""
    for i, doc in enumerate(documents[:max_docs]):
        content = doc["content"].strip().replace("\n", " ")
        metadata = doc.get("metadata", {})
        context += f"[文档{i+1} - 来源: {metadata.get('source_file', '未知')}, 页码: {metadata.get('page_num', '未知')}]:\n{content}\n\n"

    prompt = f"""你是一名保险知识问答助手，请基于以下参考资料回答用户的问题。

【参考资料】
{context}

【问题】
{query}

【回答】
"""
    return prompt


def main():
    # === 配置 ===
    api_key = "2b064694-cb3b-49e3-bb6e-55ef9ad8351f"  # 你的 API Key
    base_url = "https://ark.cn-beijing.volces.com/api/v3"  # 大模型接口地址，示例火山引擎
    model_name = "deepseek-r1-distill-qwen-32b-250120"  # 你要调用的模型名

    chat_model = CHAT_MODEL(api_key=api_key, base_url=base_url, model_name=model_name)

    # === 读取检索结果 ===
    retrieval_json_path = "../retrieval_result/QA_ds/embedding_0.7(20).json"
    with open(retrieval_json_path, "r", encoding="utf-8") as f:
        retrieval_results = json.load(f)["results"]

    output_results = []

    # === 遍历检索结果，构建 prompt 并调用模型生成回答 ===
    for item in tqdm(retrieval_results, desc="生成中"):
        query = item.get("query", "")
        correct_answer = item.get("correct_answer", "")
        retrieved_docs = item.get("retrieved_documents", [])

        prompt = build_prompt(query, retrieved_docs)
        generated_answer = chat_model.chat(prompt)

        output_results.append({
            "query": query,
            "reference_answer": correct_answer,
            "generated_answer": generated_answer,
            "source_docs": retrieved_docs
        })

    # === 保存生成结果 ===
    output_path = "./generation_result/QA_ds/generation_doubao.json"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(output_results, f, ensure_ascii=False, indent=2)

    print(f"生成完成，保存至 {output_path}")


if __name__ == "__main__":
    main()


生成中: 100%|██████████| 222/222 [17:41<00:00,  4.78s/it]

生成完成，保存至 ./generation_result/QA_ds/generation_doubao.json





In [1]:
import requests
import jieba 
import json
from tqdm import tqdm
import os
from openai import OpenAI
import nltk
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
nltk.download('punkt')


class CHAT_MODEL:
    def __init__(self, api_key, base_url, model_name):
        self.llm = OpenAI(
            api_key=api_key,
            base_url=base_url
        )
        self.model_name = model_name

    def chat(self, user_prompt):
        completion = self.llm.chat.completions.create(
            model=self.model_name,
            messages=[
                {"role": "user", "content": user_prompt},
            ],
        )
        response = completion.choices[0].message.content
        return response


def build_prompt(query, documents, max_docs=3):
    context = ""
    for i, doc in enumerate(documents[:max_docs]):
        content = doc["content"].strip().replace("\n", " ")
        metadata = doc.get("metadata", {})
        context += f"[文档{i + 1} - 来源: {metadata.get('source_file', '未知')}, 页码: {metadata.get('page_num', '未知')}]:\n{content}\n\n"

    prompt = f"""你是一名保险知识问答助手，请基于以下参考资料回答用户的问题。

【参考资料】
{context}

【问题】
{query}

【回答】
"""
    return prompt


def compute_metrics(generated, reference, retrieved_docs):
    # BLEU 计算
    # reference_tokens = [nltk.word_tokenize(reference)] if reference else [[]]
    # generated_tokens = nltk.word_tokenize(generated) if generated else []
    # try:
    #     bleu = nltk.translate.bleu_score.sentence_bleu(reference_tokens, generated_tokens)
    # except:
    #     bleu = 0.0
    reference_tokens = [list(jieba.cut(reference))] if reference else [[]]
    generated_tokens = list(jieba.cut(generated)) if generated else []
    try:
        bleu = sentence_bleu(
            reference_tokens,
            generated_tokens,
            smoothing_function=SmoothingFunction().method1
        )
    except ZeroDivisionError as e:
        print(f"BLEU 计算错误: {e}")
        bleu = 0.0

    # ROUGE 计算
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge_scores = scorer.score(reference or "", generated or "")
    rouge_1_f = rouge_scores['rouge1'].fmeasure
    rouge_L_f = rouge_scores['rougeL'].fmeasure

    return bleu, rouge_1_f, rouge_L_f


def main():
    api_key = "2b064694-cb3b-49e3-bb6e-55ef9ad8351f"  # 你的 API Key
    base_url = "https://ark.cn-beijing.volces.com/api/v3"
    model_name = "deepseek-r1-distill-qwen-7b-250120"
    chat_model = CHAT_MODEL(api_key=api_key, base_url=base_url, model_name=model_name)

    retrieval_json_path = "../retrieval_result/QA_ds/embedding_0.7(20).json"
    with open(retrieval_json_path, "r", encoding="utf-8") as f:
        retrieval_results = json.load(f)["results"]

    output_results = []

    for item in tqdm(retrieval_results, desc="Generating"):
        query = item.get("query", "")
        correct_answer = item.get("correct_answer", "")
        retrieved_docs = item.get("retrieved_documents", [])

        prompt = build_prompt(query, retrieved_docs)
        generated_answer = chat_model.chat(prompt)

        bleu, rouge_1_f, rouge_L_f = compute_metrics(generated_answer, correct_answer, retrieved_docs)
        
        output_results.append({
            "query": query,
            "reference_answer": correct_answer,
            "generated_answer": generated_answer,
            "source_docs": retrieved_docs,
            "metrics": {
                "bleu": bleu,
                "rouge_1_f": rouge_1_f,
                "rouge_L_f": rouge_L_f
            }
        })

    output_path = "./generation_result/QA_ds/generation_ds2.json"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # 计算平均指标
    total = len(output_results)
    avg_bleu = sum(item["metrics"]["bleu"] for item in output_results) / total
    avg_rouge1 = sum(item["metrics"]["rouge_1_f"] for item in output_results) / total
    avg_rougeL = sum(item["metrics"]["rouge_L_f"] for item in output_results) / total

    average_metrics = {
        "bleu": avg_bleu,
        "rouge_1_f": avg_rouge1,
        "rouge_L_f": avg_rougeL
    }

    final_output = {
        "average_metrics": average_metrics,
        "results": output_results
    }

    # 写入文件
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(final_output, f, ensure_ascii=False, indent=2)

    # 控制台打印
    print(f"生成完成，保存至 {output_path}")
    print("\n平均指标：")
    for k, v in average_metrics.items():
        print(f"{k}: {v:.4f}")


    print(f"生成完成，保存至 {output_path}")


if __name__ == "__main__":
    main()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\36325\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Generating:   0%|          | 0/222 [00:00<?, ?it/s]Building prefix dict from the default dictionary ...
Loading model from cache D:\Temp\jieba.cache
Loading model cost 0.358 seconds.
Prefix dict has been built successfully.
Generating: 100%|██████████| 222/222 [21:35<00:00,  5.84s/it]

生成完成，保存至 ./generation_result/QA_ds/generation_ds2.json

平均指标：
bleu: 0.0715
rouge_1_f: 0.2520
rouge_L_f: 0.2481
生成完成，保存至 ./generation_result/QA_ds/generation_ds2.json





In [None]:
import json
from tqdm import tqdm
import os
from openai import OpenAI
import nltk
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import jieba
from bert_score import score as bert_score


class CHAT_MODEL:
    def __init__(self, api_key, base_url, model_name):
        self.llm = OpenAI(
            api_key=api_key,
            base_url=base_url
        )
        self.model_name = model_name

    def chat(self, user_prompt):
        completion = self.llm.chat.completions.create(
            model=self.model_name,
            messages=[
                {"role": "user", "content": user_prompt},
            ],
        )
        response = completion.choices[0].message.content
        return response


def build_prompt(query, documents, max_docs=3):
    context = ""
    for i, doc in enumerate(documents[:max_docs]):
        content = doc["content"].strip().replace("\n", " ")
        metadata = doc.get("metadata", {})
        context += f"[文档{i + 1} - 来源: {metadata.get('source_file', '未知')}, 页码: {metadata.get('page_num', '未知')}]:\n{content}\n\n"

    prompt = f"""你是一名保险知识问答助手，请基于以下参考资料回答用户的问题。

【参考资料】
{context}

【问题】
{query}

【回答】
"""
    return prompt



# 初始化 ROUGE scorer
scorer_word = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
scorer_char = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=False)

def compute_metrics(generated, reference):
    # 空文本处理
    if not reference or not generated:
        return {
            "bleu": 0.0,
            "rouge_1_f": 0.0,
            "rouge_L_f": 0.0,
            "bert_score_f1": 0.0
        }

    # -------------------------------
    # 1. BLEU (jieba分词 + 平滑)
    reference_tokens = [list(jieba.cut(reference))]
    generated_tokens = list(jieba.cut(generated))
    try:
        bleu = sentence_bleu(
            reference_tokens,
            generated_tokens,
            smoothing_function=SmoothingFunction().method1
        )
    except ZeroDivisionError as e:
        print(f"BLEU 计算错误: {e}")
        bleu = 0.0

    # -------------------------------
    # 2. ROUGE (字级 + 词级 多粒度平均)
    # 字级（不切词）
    rouge_char = scorer_char.score(reference, generated)
    rouge_char_1_f = rouge_char['rouge1'].fmeasure
    rouge_char_L_f = rouge_char['rougeL'].fmeasure

    # 词级（jieba切词）
    ref_word = " ".join(jieba.cut(reference))
    gen_word = " ".join(jieba.cut(generated))
    rouge_word = scorer_word.score(ref_word, gen_word)
    rouge_word_1_f = rouge_word['rouge1'].fmeasure
    rouge_word_L_f = rouge_word['rougeL'].fmeasure

    # 多粒度融合（平均）
    rouge_1_f = (rouge_char_1_f + rouge_word_1_f) / 2
    rouge_L_f = (rouge_char_L_f + rouge_word_L_f) / 2

    # -------------------------------
    # 3. BERTScore (基于语义)
    P, R, F1 = bert_score([generated], [reference], lang='zh', rescale_with_baseline=True)
    bert_f1 = F1[0].item()

    # -------------------------------
    return bleu, rouge_1_f, rouge_L_f, bert_f1



def main():
    api_key = "2b064694-cb3b-49e3-bb6e-55ef9ad8351f"
    base_url = "https://ark.cn-beijing.volces.com/api/v3"
    model_name = "deepseek-r1-distill-qwen-7b-250120"
    chat_model = CHAT_MODEL(api_key=api_key, base_url=base_url, model_name=model_name)

    retrieval_json_path = "../retrieval_result/QA_ds/embedding_0.7(20).json"
    with open(retrieval_json_path, "r", encoding="utf-8") as f:
        retrieval_results = json.load(f)["results"]

    output_results = []

    for item in tqdm(retrieval_results, desc="Generating"):
        query = item.get("query", "")
        correct_answer = item.get("correct_answer", "")
        retrieved_docs = item.get("retrieved_documents", [])

        prompt = build_prompt(query, retrieved_docs)
        generated_answer = chat_model.chat(prompt)

        bleu, rouge_1_f, rouge_L_f, bert_score_f1 = compute_metrics(generated_answer, correct_answer)

        output_results.append({
            "query": query,
            "reference_answer": correct_answer,
            "generated_answer": generated_answer,
            "source_docs": retrieved_docs,
            "metrics": {
                "bleu": bleu,
                "rouge_1_f": rouge_1_f,
                "rouge_L_f": rouge_L_f,
                "bert_score_f1": bert_score_f1  
            }
        })

    output_path = "./generation_result/QA_ds/generation_ds2.json"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # 计算平均指标
    total = len(output_results)
    avg_bleu = sum(item["metrics"]["bleu"] for item in output_results) / total
    avg_rouge1 = sum(item["metrics"]["rouge_1_f"] for item in output_results) / total
    avg_rougeL = sum(item["metrics"]["rouge_L_f"] for item in output_results) / total
    avg_bert = sum(item["metrics"]["bert_score_f1"] for item in output_results) / total

    average_metrics = {
        "bleu": avg_bleu,
        "rouge_1_f": avg_rouge1,
        "rouge_L_f": avg_rougeL,
        "bert_score_f1": avg_bert
    }

    final_output = {
        "average_metrics": average_metrics,
        "results": output_results
    }

    # 写入文件
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(final_output, f, ensure_ascii=False, indent=2)

    # 控制台打印
    print(f"生成完成，保存至 {output_path}")
    print("\n平均指标：")
    for k, v in average_metrics.items():
        print(f"{k}: {v:.4f}")


    print(f"生成完成，保存至 {output_path}")


if __name__ == "__main__":
    main()


Generating: 100%|██████████| 222/222 [24:35<00:00,  6.64s/it]

生成完成，保存至 ./generation_result/QA_ds/generation_ds2.json

平均指标：
bleu: 0.0714
rouge_1_f: 0.2478
rouge_L_f: 0.2419
bert_score_f1: 0.3807
生成完成，保存至 ./generation_result/QA_ds/generation_ds2.json



