评估模型

In [1]:
import os
import argparse


from transformers import (
    AutoTokenizer, AutoModel
)

In [None]:
# import subprocess
# import os

# result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
# output = result.stdout
# for line in output.splitlines():
#     if '=' in line:
#         var, value = line.split('=', 1)
#         os.environ[var] = value

In [4]:
model_name_or_path = 'THUDM/chatglm3-6b'
model = AutoModel.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True).half().cuda()
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)

print(f"加载了模型 model={model_name_or_path}")
print(f"加载了分词器 tokenizer={model_name_or_path}")

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

: 

In [None]:
# 全局常量：样本构造提示
PROMPT_DICT = {
    "prompt_input": ("下面是一段文言文文本，请直接将它翻译成白话文。\n" "{terms}" "#文言文文本:\n{input}\n\n#白话文文本:\n")
}


def generate_input_prompt(text, terms=None):
    terms_prompt = ""
    if terms:
        terms_prompt = "#需应用术语:\n"
        for term in terms:
            terms_prompt += f"{term['src']}\t{term['tag']}\t{term['tgt']}\n"
    source = PROMPT_DICT["prompt_input"].format(
        input=text, terms=terms_prompt
    )
    return source

In [None]:
model = model.eval()

In [None]:
text_list = ["老吾老，以及人之老；幼吾幼，以及人之幼"]

# 生成模型输入
prompts = [generate_input_prompt(text) for text in text_list]
for prompt in prompts:
    response, history = model.chat(tokenizer, prompt, history=[])
    print(response)
    # print(prompt)

In [None]:
from typing import List, Dict

def generate_output_from_text(text_list: List[str], tokenizer) -> List[Dict[str, str]]:
    
    # 生成模型输入
    prompts = [generate_input_prompt(text) for text in text_list]
    result_list = []
    # 从输出中提取结果
    for prompt in prompts:
        response, history = model.chat(tokenizer, prompt, history=[])
        result_list.append({"text": response})

    return result_list

In [None]:
# 使用方法：
text_list = ["青，取之于蓝，而青于蓝；冰，水为之，而寒于水。"]
results = generate_output_from_text(text_list, tokenizer)
print(results[0]['text'])

In [None]:
# 使用方法：
text_list = ["十三年，署武昌知府。吴三桂犯湖南，师方攻岳州，檄成龙造浮桥济师，甫成，山水发，桥圮，坐夺官。"]
results = generate_output_from_text(text_list, tokenizer)
print(results[0]['text'])

计算参数

In [None]:
# %pip install nltk

In [None]:
import torch
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import time
import os

In [None]:
import logging
import os
import datetime

def create_logger():
    # 创建一个logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)

    # 创建一个handler，用于写入日志文件
    log_dir = './data/eval_py_logs'
    os.makedirs(log_dir, exist_ok=True)
    log_file = os.path.join(log_dir, 'eval_'+ (datetime.datetime.utcnow() + datetime.timedelta(hours=8)).strftime('%Y%m%d_%H%M%S') + '.log')
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)

    # 创建一个handler，用于输出到控制台
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)

    # 定义handler的输出格式
    # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    formatter = logging.Formatter('')
    file_handler.setFormatter(formatter)
    console_handler.setFormatter(formatter)

    # 给logger添加handler
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)

    return logger

# 使用create_logger函数来创建一个新的logger
logger = create_logger()

In [None]:
logger.info("当前时间：{}".format(datetime.datetime.utcnow()))
logger.info("base_model: {}".format(eval_args.model_name_or_path))

In [None]:
# 计算BLEU分数
def calculate_bleu(reference, candidate):
    reference = [list(reference)]
    candidate = list(candidate)
    smoothing_function = SmoothingFunction().method1
    return sentence_bleu(reference, candidate, smoothing_function=smoothing_function)

In [None]:
eva_file_path = "./data/eval_data.json"
output_file_path = "./data/eval_results/"
import os
if not os.path.exists(output_file_path):
    os.makedirs(output_file_path)

# 加载数据
with open(eva_file_path, "r", encoding="utf-8") as f:
    data = json.load(f)
print(f"评估文件：{eva_file_path}加载完成")
print(f"共加载{len(data)}个样本, 开始评估...")
logger.info(f"共加载{len(data)}个样本, 开始评估...")

In [None]:
# 获得绝对路径
local_time = time.localtime()
time_str = time.strftime("%Y/%m/%d %H:%M:%S", local_time)

print(f"当前时间：{time_str}")
print("开始评估...")

evaluation_results = {
    "scores": {"average_BLEU": 0},
    "infos":{
        "evaluation_time": time_str,
        "base_model": eval_args.model_name_or_path,
    },
    "samples": [],
}

In [None]:
total_bleu_score = 0

# 对每个样本进行评估，并记录进度
for i, example in enumerate(data):
    print(f"正在评估第{i+1}个样本，共{len(data)}个样本")
    logging.info(f"正在评估第{i+1}个样本，共{len(data)}个样本")
    inputs = example["input"]
    truths = example["output"]  # 注意这里truths是一个列表
    text_list = [inputs]
    results = generate_output_from_text(text_list, tokenizer)

    results_str = results[0]['text']
    
    logging.info("inputs: {}".format(inputs))
    logging.info("results: {}".format(results[0]['text']))

    # 对每个truth计算BLEU分数，并选择最高的BLEU分数
    max_bleu_score = 0
    # 保存最高分数的truth
    max_truth = ""
    for truth in truths:
        truth_str = str(truth)
        logging.info("truth: {}".format(truth_str))
        bleu_score = calculate_bleu(truth_str, results_str)
        if bleu_score > max_bleu_score:
            max_bleu_score = bleu_score
            max_truth = truth_str

    total_bleu_score += max_bleu_score

    sample = {
        "inputs": inputs,
        "truth": max_truth,
        "results": results_str,
        "BLEU": max_bleu_score,
    }
    evaluation_results["samples"].append(sample)

In [None]:
# 计算总的BLEU分数
evaluation_results["scores"]["average_BLEU"] = total_bleu_score / len(data)
logging.info("评估完成，平均BLEU分数为{}".format(evaluation_results['scores']['average_BLEU']))
print("评估完成，平均BLEU分数为{}".format(evaluation_results['scores']['average_BLEU']))

In [None]:
# 保存评估结果
time_str = time.strftime("%Y%m%d%H%M%S", local_time)
output_file_path = output_file_path + f"evaluation_results_{time_str}.json"
with open(output_file_path, "w", encoding="utf-8") as f:
    json.dump(evaluation_results, f, ensure_ascii=False, indent=4)
logging.info("评估结果已保存到{}".format(output_file_path))
print("评估结果已保存到{}".format(output_file_path))