In [2]:
from pathlib import Path
import json

reference_folder = Path("../source/競賽資料集/reference_text")
question_json_path = Path("../source/競賽資料集/dataset/preliminary/questions_example.json")

groundtruth_json_path = Path("../source/競賽資料集/dataset/preliminary/ground_truths_example.json")

with open(question_json_path, "r") as f:
    questions = json.load(f)["questions"]

with open(groundtruth_json_path, "r") as f:
    groundtruths = json.load(f)["ground_truths"]

answer_json_paths = [
    path for path in sorted(Path("../output").glob("answer_v*.json"))
]

answer_jsons = {}
for answer_json_path in answer_json_paths:
    with open(answer_json_path, "r") as f:
        answer_jsons[answer_json_path.stem] = json.load(f)["answers"]

len(answer_jsons)


27

In [3]:
errors = {}
for answer_json_path, answers in answer_jsons.items():
    errors[answer_json_path] = []
    for question, answer, groundtruth in zip(questions, answers, groundtruths):
        assert question["qid"] == answer["qid"] == groundtruth["qid"]
        if answer["retrieve"] != groundtruth["retrieve"]:
            errors[answer_json_path].append(question["qid"])

for answer_json_path, error_qids in errors.items():
    print(f'{answer_json_path}: {len(error_qids)}')

# 計算所有 error_qids 的交集
error_sets = [set(error_qids) for error_qids in errors.values()]
intersection_errors = set.intersection(*error_sets) if error_sets else set()
union_errors = set.union(*error_sets) if error_sets else set()

print(f'intersection error QIDs: {intersection_errors}')
print(f'union error QIDs: {len(union_errors)}')


answer_v2_bm25_ckip: 32
answer_v2_bm25_clean_ckip: 29
answer_v2_bm25_clean_jieba: 34
answer_v2_bm25_jieba: 41
answer_v3_clean_embedding_bge-large-zh-v1.5_500_128: 29
answer_v3_clean_embedding_bge-m3_4096_3072: 37
answer_v3_clean_embedding_bge-m3_512_128: 31
answer_v3_clean_sparse_embedding_bge-m3_4096_3072: 31
answer_v4_clean_rerank_bge-reranker-v2-m3_2048_512: 31
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.03: 25
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.05: 21
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.08: 22
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.1: 21
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.12: 21
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.2: 24
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.3: 27
answer_v5_clean_shallow_fusion_ckip_bge-m3_4096_3072_alpha_0.01: 24
answer_v5_clean_shallow_fusio

In [5]:
# 找出兩兩比較差異最大的 error_qids
error_sets = {path: set(qids) for path, qids in errors.items()}
max_diff = 0
max_pair = (None, None)
max_diff_set = set()

for path1, set1 in error_sets.items():
    for path2, set2 in error_sets.items():
        if path1 != path2:
            diff_set = set1.symmetric_difference(set2)
            if len(diff_set) > max_diff:
                max_diff = len(diff_set)
                max_pair = (path1, path2)
                max_diff_set = diff_set

print(f'Max difference between {max_pair[0]} and {max_pair[1]}: {max_diff}')
print(f'Difference QIDs: {max_diff_set}')

Max difference between answer_v2_bm25_ckip and answer_v3_clean_embedding_bge-m3_4096_3072: 45
Difference QIDs: {2, 4, 6, 8, 10, 11, 12, 15, 16, 144, 19, 23, 30, 38, 40, 42, 43, 47, 49, 51, 53, 58, 59, 64, 66, 67, 68, 69, 70, 72, 73, 75, 76, 79, 89, 90, 92, 93, 94, 95, 96, 97, 98, 100, 124}


In [31]:
print(len(set.union(set(errors['answer_v2_bm25_ckip']), set(errors['answer_v3_clean_embedding_bge-m3_4096_3072']))))
print(len(set.intersection(set(errors['answer_v2_bm25_ckip']), set(errors['answer_v3_clean_embedding_bge-m3_4096_3072']))))


57
12


In [12]:
for path1, set1 in error_sets.items():
    for path2, set2 in error_sets.items():
        if path1 != path2:
            if not ('answer_v2' in path1 and 'answer_v5' in path2):
                continue
            print(f'compare {path1} and {path2}')

            set1_unique_errors = set1 - set2
            print(f'{path1} unique errors: {len(set1_unique_errors)}')
            print(f'Unique QIDs in {path1}: {set1_unique_errors}')
            set2_unique_errors = set2 - set1
            print(f'{path2} unique errors: {len(set2_unique_errors)}')
            print(f'Unique QIDs in {path2}: {set2_unique_errors}')

            # 計算共同的錯誤
            common_errors = set1 & set2
            common_errors = set.intersection(set1, set2)
            print(f'Common errors: {len(common_errors)}')
            print(f'Common QIDs: {common_errors}')

            diff_set = set1.symmetric_difference(set2)

            print(f'difference between {path1} and {path2}: {len(diff_set)}')
            print(f'Difference QIDs: {diff_set}')

            print('========================================')
            print()


compare answer_v2_bm25_ckip and answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.03
answer_v2_bm25_ckip unique errors: 16
Unique QIDs in answer_v2_bm25_ckip: {97, 66, 3, 68, 6, 40, 10, 12, 76, 144, 124, 82, 24, 89, 58, 92}
answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.03 unique errors: 9
Unique QIDs in answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.03: {2, 67, 69, 37, 75, 77, 79, 53, 93}
Common errors: 16
Common QIDs: {98, 35, 99, 135, 72, 73, 109, 50, 19, 51, 61, 86, 94, 59, 29, 62}
difference between answer_v2_bm25_ckip and answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.03: 25
Difference QIDs: {2, 3, 6, 10, 12, 144, 24, 37, 40, 53, 58, 66, 67, 68, 69, 75, 76, 77, 79, 82, 89, 92, 93, 97, 124}

compare answer_v2_bm25_ckip and answer_v5_clean_shallow_fusion_ckip_bge-large-zh-v1.5_500_128_alpha_0.05
answer_v2_bm25_ckip unique errors: 17
Unique QIDs in answer_v2_bm25_ckip: {97, 66, 3, 68, 6, 40, 10,

In [20]:
f = open('answer_v5_clean_shallow_fusion_ckip_bge-m3_512_128_alpha_0.1.txt', 'w')

answers = answer_jsons['answer_v5_clean_shallow_fusion_ckip_bge-m3_512_128_alpha_0.1']

correct_count = 0
for question, answer, groundtruth in zip(questions, answers, groundtruths):
    assert question["qid"] == answer["qid"] == groundtruth["qid"]
    if answer["retrieve"] != groundtruth["retrieve"]:
        print('========================================')
        f.write('========================================\n')
        print(f'qid {question["qid"]} is wrong')
        f.write(f'qid {question["qid"]} is wrong\n')
        print(f'category: {question["category"]}, ground truth: {groundtruth["retrieve"]}, model output: {answer["retrieve"]}')
        f.write(f'category: {question["category"]}, ground truth: {groundtruth["retrieve"]}, model output: {answer["retrieve"]}\n')
        category = question["category"]
        print(f'---------- query ----------')
        f.write(f'---------- query ----------\n')
        print(question["query"])
        f.write(f'{question["query"]}\n')
        groundtruth_reference = reference_folder / f'{category}/{groundtruth["retrieve"]}.txt'
        print(f'---------- ground truth reference {category}/{groundtruth["retrieve"]} ----------')
        f.write(f'---------- ground truth reference {category}/{groundtruth["retrieve"]} ----------\n')
        with open(groundtruth_reference, "r") as fp:
            groundtruth_reference_text = fp.read()
            print(groundtruth_reference_text)
            f.write(groundtruth_reference_text)
        answer_reference = reference_folder / f'{category}/{answer["retrieve"]}.txt'
        print(f'---------- answer reference {category}/{answer["retrieve"]} ----------')
        f.write(f'---------- answer reference {category}/{answer["retrieve"]} ----------\n')
        with open(answer_reference, "r") as fp:
            answer_reference_text = fp.read()
            print(answer_reference_text)
            f.write(answer_reference_text)
    else:
        correct_count += 1

print(f'correct count: {correct_count}')


qid 4 is wrong
category: insurance, ground truth: 186, model output: 179
---------- query ----------
本契約內容的變更應經由誰同意並批註？
---------- ground truth reference insurance/186 ----------
**page 0**
南山人壽威美鑽美元利率變動型終身壽險（定期給付型）_SYUL
保險公司者，不得對抗保險公司。
前項受益人的變更，於要保人檢具申請書及被保險人的同意書（要、被保險人為
同一人時為申請書或電子申請文件）送達本公司時，本公司應即予批註或發給批
註書。
受益人同時或先於被保險人本人身故，除要保人已另行指定受益人外，以被保險
人身故時之法定繼承人為本契約受益人。
前項法定繼承人之順序及應得保險金之比例適用民法繼承編相關規定。
第三十二條 變更住所
要保人的住所有變更時，應即以書面或其他約定方式通知本公司。
要保人不為前項通知者，本公司之各項通知，得以本契約所載要保人之最後住所
發送之。
第三十三條 時效
由本契約所生的權利，自得為請求之日起，經過兩年不行使而消滅。
第三十四條 批註
本契約內容的變更，或記載事項的增刪，除第三十一條規定者外，應經要保人與
本公司雙方書面或其他約定方式同意，並由本公司即予批註或發給批註書。
第三十五條 管轄法院
因本契約涉訟者，同意以要保人住所地地方法院為第一審管轄法院，要保人的住
所在中華民國境外時，以本公司總公司所在地地方法院為第一審管轄法院。但不
得排除消費者保護法第四十七條及民事訴訟法第四百三十六條之九小額訴訟管
轄法院之適用。
第 12 頁，共 16 頁


---------- answer reference insurance/179 ----------
**page 0**
南山人壽添保倍美元利率變動型終身壽險（定期給付型）_BYUPL8
二、於保險事故發生前經被保險人同意變更受益人，如要保人未將前述變更通知
保險公司者，不得對抗保險公司。
前項受益人的變更，於要保人檢具申請書及被保險人的同意書（要、被保險人為
同一人時為申請書或電子申請文件）送達本公司時，本公司應即予批註或發給批
註書。
「身故保險金」或喪葬費用保險金受益人同時或先於被保險人本人身故，

In [17]:
131/150

0.8733333333333333