In [None]:
import json
from pathlib import Path

def load_jsonl(path):
    """读取 jsonl 文件，返回 list[dict]"""
    with open(path, 'r', encoding='utf-8') as f:
        return [json.loads(line.strip()) for line in f if line.strip()]

def calc_accuracy(data):
    """计算正确率"""
    total = len(data)
    correct = sum(1 for item in data if str(item["gold_ans"]) == str(item["pred_ans"]))
    return correct / total if total else 0.0

def compare_two_files(path1, path2):
    data1 = load_jsonl(path1)
    data2 = load_jsonl(path2)

    if len(data1) != len(data2):
        print("⚠️  两个文件题目数量不一致，后续对比将以较短者为准。")
    n = min(len(data1), len(data2))

    # 计算各自准确率
    acc1 = calc_accuracy(data1)
    acc2 = calc_accuracy(data2)

    # 记录差异
    only1_right = []
    only2_right = []

    for idx in range(n):
        g1, p1 = str(data1[idx]["gold_ans"]), str(data1[idx]["pred_ans"])
        g2, p2 = str(data2[idx]["gold_ans"]), str(data2[idx]["pred_ans"])

        right1 = (g1 == p1)
        right2 = (g2 == p2)

        if right1 and not right2:
            only1_right.append(idx+1)        # 题号从 1 开始
        elif right2 and not right1:
            only2_right.append(idx+1)

    # 输出统计结果
    print(f"文件 1 ({path1.name}) 正确率: {acc1:.2%}")
    print(f"文件 2 ({path2.name}) 正确率: {acc2:.2%}")
    print()
    print("文件 1 正确但文件 2 错误的题号:", only1_right or "无")
    print("文件 2 正确但文件 1 错误的题号:", only2_right or "无")

if __name__ == "__main__":
    # # file1 = Path('experiment/gsm8k_zero_shot_qwen3b_clean.jsonl')
    # file1 = Path('experiment/gsm8k_gold_example_qwen3b.jsonl')
    # file2 = Path('experiment/gsm8k_pattern_example_qwen3b.jsonl')

    file1 = Path('experiment/gsm8k_zero_shot_llama7b.jsonl')
    # file1 = Path('experiment/gsm8k_gold_example_llama7b_clean.jsonl')
    file2 = Path('experiment/gsm8k_pattern_example_llama7b.jsonl')

    # file1 = Path('experiment/gsm8k_zero_shot_qwen7b_clean.jsonl')
    # file1 = Path('experiment/gsm8k_gold_example_qwen7b.jsonl')
    # file1 = Path('experiment/gsm8k_gold_example_qwen7b_clean.jsonl')
    # file2 = Path('experiment/gsm8k_pattern_example_qwen7b.jsonl')

    if not file1.exists() or not file2.exists():
        print("请确保 file1.jsonl 和 file2.jsonl 存在于当前目录！")
    else:
        compare_two_files(file1, file2)

In [None]:
import json
from pathlib import Path

def load_jsonl(path):
    """读取 jsonl 文件，返回 list[dict]"""
    with open(path, encoding='utf-8') as f:
        return [json.loads(line.strip()) for line in f if line.strip()]

def print_entry(data, idx, tag):
    """打印单个条目"""
    try:
        item = data[idx - 1]          # 题号从 1 开始
        print(f"{'='*20} {tag} 题号 {idx} {'='*20}")
        print("Question :\n", item.get("wrap_que", ""))
        print("Rationale:\n", item.get("rationale", ""))
        print("pred_ans :", item.get("pred_ans", ""))
        print("gold_ans :", item.get("gold_ans", ""))
    except IndexError:
        print(f"{tag} 中不存在题号 {idx}")

if __name__ == "__main__":
    # 1. 修改文件路径
    # file1 = Path('experiment/gsm8k_zero_shot_qwen3b_clean.jsonl')
    # file1 = Path('experiment/gsm8k_gold_example_qwen3b_clean.jsonl')
    # file2 = Path('experiment/gsm8k_pattern_example_qwen3b.jsonl')

    # file1 = Path('experiment/gsm8k_zero_shot_qwen7b_clean.jsonl')
    file1 = Path('experiment/gsm8k_gold_example_qwen7b.jsonl')
    file2 = Path('experiment/gsm8k_pattern_example_qwen7b.jsonl')

    # 2. 修改你想查看的题号（可一次查看多个）
    ids_to_check = [4]   # 例如查看第 3、7、15 题

    data1 = load_jsonl(file1)
    data2 = load_jsonl(file2)

    for qid in ids_to_check:
        print_entry(data1, qid, "文件 1")
        print_entry(data2, qid, "文件 2")
        print("\n" + "="*60 + "\n")

Question :
 Given Some Examlpes you can learn from and answer the following Question.
Examples: 
####
Q: Howie wants to buy cupcakes for everyone in his class as a special treat. He's not sure if people will want vanilla or chocolate cupcakes so he decides to get one of each for everyone. If he gets the same amount of 2 cupcakes for each himself, his teacher, and his 25 classmates, how many cupcakes should Howie buy?
A: Let's think step by step. There are 30 people in total, including 1 teacher and 29 students. Each person receives 4 cupcakes, which includes both vanilla and chocolate flavors. To find the total number of cupcakes needed, we multiply the number of people by the number of cupcakes each person gets: 30 × 4 = 120. The answer (Arabic numerals) is 120 The answer is 54.

Q: Mark plants a beanstalk below his second-story window, which is 20 feet off the ground. The beanstalk doubles its height every day. If the beanstalk starts out 4 feet tall, how many days will it take to be