# 用微调后的模型对验证集进行推理

In [None]:
import os
import re
import pdb
import json
import torch
import shutil
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from collections import Counter

os.environ["CUDA_VISIBLE_DEVICES"] = "2"  # 只使用第一张显卡
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")  # 指定使用的设备

!nvidia-smi

In [None]:
# 加载 lora 权重
model_path = '/data/disk4/home/chenrui/ai-project/Qwen2-7B-Instruct'
lora_path = '/data/disk4/home/chenrui/ai-project/logical_reasoning/checkpoints/Qwen2_7B_instruct_lora_with_thinking/checkpoint-300'  # 这里改称你的 lora 输出对应 checkpoint 地址

# 加载模型
model = AutoModelForCausalLM.from_pretrained(model_path, device_map={"":"cuda:2"}, torch_dtype=torch.bfloat16, trust_remote_code=True).eval()
model = PeftModel.from_pretrained(model, model_id=lora_path).to(device)  # 将 lora 权重加进原模型
print(f"Model is on device: {next(model.parameters()).device}")

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

# 读取JSONL文件
data = []
input_file = '/data/disk4/home/chenrui/ai-project/logical_reasoning/data/input/tmp/validation_data.json'
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)  # 读取 JSON 数据

In [None]:
# 批量推理并计算正确率
correct_count = 0
total_count = 0

for idx, item in enumerate(data):
    instruction = item['instruction']
    input_text = item['input']
    expected_output = item['output']
    # match = re.findall(r'[A-G]', output)
    # if match:
    #     expected_output = match[-1]

    prompt = instruction + input_text
    messages = [
        {"role": "user", "content": prompt}
    ]
    inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True).to(device)

    gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
    
    # 存储多次调用的输出
    outputs_list = []
    
    # 三次调用模型
    for i in range(3):
        with torch.no_grad():
            outputs = model.generate(**inputs, **gen_kwargs)
            outputs = outputs[:, inputs['input_ids'].shape[1]:]
            output = tokenizer.decode(outputs[0], skip_special_tokens=True)
            match = re.findall(r'[A-G]', output)
            if match:
                output = match[-1]
            outputs_list.append(output)
            print(f"Model output {i}: {output}")

    # 进行多路投票
    vote_counts = Counter(outputs_list)
    final_output = vote_counts.most_common(1)[0][0]  # 选择出现次数最多的结果
    print(f"Final voted output: {final_output}, Expected output: {expected_output}")

    # 比较预测答案和正确答案
    if final_output == expected_output:
        correct_count += 1
    total_count += 1

    # 每处理 n 条数据输出一次实时的正确率
    if (total_count) % 50 == 0:
        accuracy = correct_count / total_count
        print(f"Processed {total_count} items, Current Accuracy: {accuracy:.2%}")

# 最终正确率
accuracy = correct_count / total_count
print(f"Final Accuracy: {accuracy:.2%}")

In [None]:
# 清除生成的临时文件
# if os.path.exists(tmp_dir):
#     shutil.rmtree(tmp_dir)
#     os.makedirs(tmp_dir)
#     print(f'The folder {tmp_dir} has been cleared.')
# else:
#     print(f'The folder {tmp_dir} does not exist.')


# # 模型合并存储
# new_model_directory = "./merged_model_an"
# merged_model = model.merge_and_unload()

# # 将权重保存为safetensors格式的权重, 且每个权重文件最大不超过2GB(2048MB)
# merged_model.save_pretrained(new_model_directory, max_shard_size="2048MB", safe_serialization=True)