In [None]:
# 使用sys.path添加上级目录
import sys
import os
package_path = os.path.dirname(os.path.dirname(os.getcwd()))
file_path = os.path.join(package_path, "ch07", "03_model-evaluation")
print(file_path)
sys.path.append(file_path)

import torch
if torch.cuda.is_available():
   device = torch.device("cuda")
elif torch.backends.mps.is_available():
   device = torch.device("mps")
else:
   device = torch.device("cpu")

## 测试 OpenAI API (使用deepseek)

In [None]:
def run_chatgpt(prompt, client, model="gpt-4-turbo"):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0,
        seed=123,
    )
    return response.choices[0].message.content

In [None]:
from openai import OpenAI
from config_manager import config_manager

config = config_manager.config
api_key = config.get('api', {}).get('deepseek_api_key')
if not api_key:
    raise ValueError("需要提供API密钥")
model = config.get('api', {}).get('model', 'deepseek-chat')

# 初始化OpenAI客户端来访问Deepseek API
client = OpenAI(
    api_key=api_key,
    base_url=config.get('api', {}).get('deepseek_api_url'),
    model=model
)

In [None]:
# 准备输入
prompt = f"Respond with 'hello world' if you got this message."
run_chatgpt(prompt, client)

## 创建训练用样本保存为json

In [None]:
# 加载测试用原始数据
import json

data_dir = "../../ch07/03_model-evaluation/"
json_file = "eval-example-data.json"

with open(data_dir + json_file, "r") as file:
    json_data = json.load(file)

print("Number of entries:", len(json_data))

In [None]:
json_data[0]

In [None]:
# 格式化输入数据
def format_input(entry):
    instruction_text = (
        f"Below is an instruction that describes a task. Wrtie a response that "
        f"appropriately completes the request."
        f"\n\n### Instruction:\n{entry['instruction']}"
    )

    input_text = f"\n\n### Input:\n{entry['input']}" if entry['input'] else ""
    
    return instruction_text + input_text

In [None]:
# 少量样本测试
for entry in json_data[:5]:
    text = entry["output"]
    prompt = (
        f"Given the input `{format_input(entry)}` "
        f"and correct output `{entry['output']}`, "
        f"score the model response `{entry['model 1 response']}`"
        f" on a scale from 0 to 100, where 100 is the best score. "
    )

    print("\nDataset response:")
    print(">>", entry['output'])
    print("\nModel response:")
    print(">>", entry["model 1 response"])
    print("\nScore:")
    print(">>", run_chatgpt(prompt, client))
    print("\n-------------------------")

In [None]:
from tqdm import tqdm

def generate_model_scores(json_data, json_key, client):
    scores = []
    for entry in tqdm(json_data, desc="Scoring entries"):
        prompt = (
            f"Given the input `{format_input(entry)}` "
            f"and correct output `{entry['output']}`, "
            f"score the model response `{entry[json_key]}`"
            f" on a scale from 0 to 100, where 100 is the best score. "
            f"Respond with the number only."
        )

        score = run_chatgpt(prompt, client)
        try:
            scores.append(int(score))
        except ValueError:
            print(f"Error converting score to int: {score}")
            continue
    
    return scores

In [None]:
from pathlib import Path

for model in ("model 1 response", "model 2 response"):
    scores = generate_model_scores(json_data, model, client)
    print(f"\n{model}")
    print(f"Number of scores: {len(scores)} of {len(json_data)}")
    print(f"Average score: {sum(scores)/len(scores):.2f}\n")

    # 保存打分结果
    # save_path = Path(f"model-{model}-scores.json")
    # with open(save_path, "w") as f:
    #     json.dump(scores, file)