# 使用真实数据集测试Bug检测器

本notebook演示如何使用bug_source_code数据集中的真实bug样本来测试DSPy代码bug检测功能。

## 1. 环境设置

In [None]:
import dspy
import os
import json
import random
from code_bug_detector import CodeBugDetector, BUG_TYPE_MAPPING

In [None]:
# 配置DSPy模型
lm = dspy.LM(
    model='anthropic/claude-3-5-sonnet-20241022',
    api_key=os.getenv('ANTHROPIC_API_KEY'),
    max_tokens=4000
)

dspy.configure(lm=lm)
print("✓ DSPy模型配置完成")

In [None]:
# 创建bug检测器实例
detector = CodeBugDetector()
print("✓ Bug检测器已初始化")

## 2. 加载数据集

In [None]:
# 读取数据集元数据
with open('bug_source_code/metadata.json', 'r') as f:
    bugs_metadata = json.load(f)

print(f"✓ 成功加载 {len(bugs_metadata)} 个bug样本")
print(f"\n数据集统计:")
print(f"  - 总Bug数: {len(bugs_metadata)}")

# 统计各类型bug数量
bug_type_counts = {}
for bug in bugs_metadata:
    bug_type = bug['bug_type_id']
    bug_type_counts[bug_type] = bug_type_counts.get(bug_type, 0) + 1

print(f"\nBug类型分布:")
for bug_type_id in sorted(bug_type_counts.keys()):
    count = bug_type_counts[bug_type_id]
    desc = BUG_TYPE_MAPPING.get(bug_type_id, "未知")
    print(f"  {bug_type_id}: {count:3d} - {desc}")

## 3. 辅助函数

In [None]:
def load_bug_code(bug_metadata):
    """
    加载特定bug的代码
    
    Returns:
        (buggy_code, fixed_code, bug_info)
    """
    buggy_file = bug_metadata['files']['buggy_function']
    fixed_file = bug_metadata['files']['fixed_function']
    
    with open(f"bug_source_code/{buggy_file}", 'r') as f:
        buggy_code = f.read()
    
    with open(f"bug_source_code/{fixed_file}", 'r') as f:
        fixed_code = f.read()
    
    bug_info = {
        'bug_id': bug_metadata['bug_id'],
        'bug_type_id': bug_metadata['bug_type_id'],
        'bug_type': bug_metadata['bug_type'],
        'project': bug_metadata['project_name'],
        'location': bug_metadata['location']
    }
    
    return buggy_code, fixed_code, bug_info


def test_bug_detection(bug_metadata, verbose=True):
    """
    测试单个bug的检测
    """
    buggy_code, fixed_code, bug_info = load_bug_code(bug_metadata)
    
    if verbose:
        print(f"\n{'='*80}")
        print(f"测试Bug: {bug_info['bug_id']}")
        print(f"项目: {bug_info['project']}")
        print(f"真实Bug类型: {bug_info['bug_type_id']} - {bug_info['bug_type']}")
        print(f"{'='*80}")
        print(f"\nBuggy代码:")
        print(buggy_code[:500])  # 只显示前500字符
        if len(buggy_code) > 500:
            print("...")
    
    # 执行检测
    result = detector(buggy_code)
    
    if verbose:
        print(f"\n检测结果:")
        print(detector.format_output(result))
    
    return result, bug_info

## 4. 测试单个Bug样本

In [None]:
# 随机选择一个bug进行测试
random_bug = random.choice(bugs_metadata)
result, bug_info = test_bug_detection(random_bug)

## 5. 测试特定类型的Bug

In [None]:
# 测试空指针解引用 (C.1)
c1_bugs = [b for b in bugs_metadata if b['bug_type_id'] == 'C.1']

if c1_bugs:
    print(f"找到 {len(c1_bugs)} 个空指针解引用bug")
    result, bug_info = test_bug_detection(c1_bugs[0])
else:
    print("未找到C.1类型的bug")

In [None]:
# 测试逻辑组织错误 (D.1)
d1_bugs = [b for b in bugs_metadata if b['bug_type_id'] == 'D.1']

if d1_bugs:
    print(f"找到 {len(d1_bugs)} 个逻辑组织错误bug")
    result, bug_info = test_bug_detection(d1_bugs[0])
else:
    print("未找到D.1类型的bug")

## 6. 批量测试（可选）

In [None]:
# 从每种类型中随机选择一个进行测试
test_samples = {}
for bug_type_id in BUG_TYPE_MAPPING.keys():
    bugs_of_type = [b for b in bugs_metadata if b['bug_type_id'] == bug_type_id]
    if bugs_of_type:
        test_samples[bug_type_id] = random.choice(bugs_of_type)

print(f"准备测试 {len(test_samples)} 个不同类型的bug样本")
print("注意：这会调用多次API，可能需要一些时间...\n")

In [None]:
# 执行批量测试（取消注释以运行）
# results = []
# for bug_type_id, bug_metadata in test_samples.items():
#     print(f"\n测试 {bug_type_id} 类型...")
#     result, bug_info = test_bug_detection(bug_metadata, verbose=False)
#     results.append({
#         'expected_type': bug_type_id,
#         'detected': result['has_bug'],
#         'bug_info': bug_info
#     })
#
# # 统计结果
# detected_count = sum(1 for r in results if r['detected'])
# print(f"\n检测结果汇总:")
# print(f"  总测试数: {len(results)}")
# print(f"  检测到bug: {detected_count}")
# print(f"  检测率: {detected_count/len(results)*100:.1f}%")

## 7. 查看具体Bug示例

In [None]:
# 查看第一个bug的详细信息
first_bug = bugs_metadata[0]
buggy_code, fixed_code, bug_info = load_bug_code(first_bug)

print(f"Bug ID: {bug_info['bug_id']}")
print(f"Bug类型: {bug_info['bug_type_id']} - {bug_info['bug_type']}")
print(f"项目: {bug_info['project']}")
print(f"\nBuggy代码:")
print("-" * 80)
print(buggy_code)
print("-" * 80)
print(f"\nFixed代码:")
print("-" * 80)
print(fixed_code)
print("-" * 80)

## 8. 自定义测试

In [None]:
# 通过bug_id选择特定的bug进行测试
bug_id_to_test = "CESNET___libyang_1_09abf888"  # 修改为你想测试的bug_id

bug_to_test = next((b for b in bugs_metadata if b['bug_id'] == bug_id_to_test), None)

if bug_to_test:
    result, bug_info = test_bug_detection(bug_to_test)
else:
    print(f"未找到Bug ID: {bug_id_to_test}")