# C语言Bug类型分类

本文档定义了C语言代码中常见的bug类型ID及其对应的描述。

In [None]:
# Bug类型ID到描述的映射字典（基于实际数据集）
# 数据来源: bug_source_code/metadata.json
BUG_TYPE_MAPPING = {
    # A类: Signature错误（函数签名相关）
    "A.1": "Signature: Incorrect Function Usage - 函数使用不当",
    "A.2": "Signature: Fault Input Type - 错误的输入类型",
    "A.3": "Signature: Incorrect Function Return Value - 函数返回值错误",
    "A.4": "Signature: Incorrect Variable Usage - 变量使用不当",
    
    # B类: Sanitizer错误
    "B": "Sanitizer: Control Expression Error - 控制表达式错误",
    
    # C类: Memory错误（内存相关）
    "C.1": "Memory Error: Null Pointer Dereference - 空指针解引用",
    "C.2": "Memory Error: Uncontrolled Resource Consumption - 资源消耗失控",
    "C.3": "Memory Error: Memory Overflow - 内存溢出",
    
    # D类: Logic Organization错误（逻辑组织）
    "D.1": "Logic Organization: Improper Condition Organization - 条件组织不当",
    "D.2": "Logic Organization: Wrong Function Call Sequence - 函数调用顺序错误"
}

In [None]:
# 显示所有bug类型
import pandas as pd

df = pd.DataFrame([
    {"Bug类型ID": bug_id, "描述": description}
    for bug_id, description in BUG_TYPE_MAPPING.items()
])

print("C语言Bug类型分类表：")
print("=" * 80)
df

In [None]:
# 根据ID获取描述的辅助函数
def get_bug_description(bug_id: str) -> str:
    """
    根据Bug类型ID获取对应的描述
    
    Args:
        bug_id: Bug类型ID (如 "A.1", "B", "C.1" 等)
    
    Returns:
        Bug类型的完整描述
    """
    return BUG_TYPE_MAPPING.get(bug_id, "未知Bug类型")

# 测试
print(get_bug_description("A.1"))
print(get_bug_description("C.1"))
print(get_bug_description("D.1"))

## Bug类型统计示例

以下是一些常见bug类型的统计示例数据

In [None]:
# 实际数据集中的bug类型分布统计（基于245个bug样本）
bug_statistics = {
    "D.1": 66,  # Logic Organization: Improper Condition Organization
    "B": 64,    # Sanitizer: Control Expression Error
    "A.4": 25,  # Signature: Incorrect Variable Usage
    "D.2": 20,  # Logic Organization: Wrong Function Call Sequence
    "A.3": 19,  # Signature: Incorrect Function Return Value
    "A.1": 19,  # Signature: Incorrect Function Usage
    "A.2": 12,  # Signature: Fault Input Type
    "C.2": 9,   # Memory Error: Uncontrolled Resource Consumption
    "C.1": 6,   # Memory Error: Null Pointer Dereference
    "C.3": 5,   # Memory Error: Memory Overflow
}

# 可视化
import matplotlib.pyplot as plt

bug_types = [get_bug_description(bid).split(' - ')[0] for bid in bug_statistics.keys()]
counts = list(bug_statistics.values())

plt.figure(figsize=(14, 7))
plt.bar(range(len(bug_types)), counts, color='steelblue')
plt.xticks(range(len(bug_types)), bug_types, rotation=45, ha='right', fontsize=9)
plt.xlabel('Bug类型', fontsize=12)
plt.ylabel('数量', fontsize=12)
plt.title('C语言Bug类型分布统计 (共245个样本)', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\n总Bug数量: {sum(counts)}")
print(f"Bug类型数: {len(bug_statistics)}")