### Easy Generate CFG

#### LLM

效果不太好，还是需要我们一步步进行处理！

step1 先将文件的嵌套类，方法给找到

In [4]:
import json
import os

from concurrent.futures import as_completed, ThreadPoolExecutor
from multiprocessing import cpu_count
from functools import partial

# 这里假设你有一个自己封装的 get_llm_answers 函数
# 请根据实际情况导入
from llm import get_llm_answers

from tqdm import tqdm


def get_step1_prompt(code_text: str, program_language: str):
    """
    生成第一步的 Prompt，用于让大模型识别所有类和函数（以及嵌套关系）。
    注意，这里去掉了原先的三重反引号。
    """
    code_lines = code_text.splitlines()
    code_lines_json = [{
        "line": i + 1,
        "code": line
    } for i, line in enumerate(code_lines)]

    prompt = f"""
You are given a piece of {program_language} code. Your goal is to find all the nested classes and methods in the code.

Please return the result in JSON format, your output should be the following format:

{{
    "name": "example_script",  // Name of the script or function
    "type": "CFG",
    "start_line": number,
    "end_line": number,
    "functions": [
      {{
        "name": "function_name",
        "type": "function",
        "start_line": number,
        "end_line": number,
        "functions": [],         // Nested functions
        "classes": []            // Nested classes
      }}
    ],
    "classes": [
      {{
        "name": "class_name",
        "type": "class",
        "start_line": number,
        "end_line": number,
        "functions": [           // Methods of the class
          {{
            "name": "method_name",
            "type": "function",
            "start_line": number,
            "end_line": number,
            "functions": [],     // Nested functions
            "classes": []        // Nested classes
          }}
        ]
      }}
    ]
}}

The code lines are:
{json.dumps(code_lines_json, indent=2)}

IMPORTANT: Make sure that the nested classes and methods are in the correct level. For example, if a function is nested in another class, the function should be in the nested class's functions list.
Besides, if a class is nested in another class, the class should be in the nested class's classes list.
"""
    return prompt.strip()


def find_nested_classes_and_methods(code_text: str, program_language: str):
    """
    调用 LLM，让其识别文件中的嵌套类、函数，并返回 JSON 结构。
    """
    prompt = get_step1_prompt(code_text, program_language)
    response = get_llm_answers(prompt, model_name="gpt-4o", require_json=True)
    nested_classes_and_methods = json.loads(response)
    return nested_classes_and_methods


def process_file_with_chain_of_thought(code_text: str, program_language: str):
    """
    读取代码文本 -> 让大模型找出所有嵌套类/函数 -> 返回 JSON 结构
    """
    step1_result = find_nested_classes_and_methods(code_text, program_language)
    return step1_result


def get_code_by_line_range(code_block, code):
    """
    取出 code_block 代表的行范围(start_line~end_line)，并且排除其内部嵌套类/函数的所有行。
    最终保留一组 { "lineno", "line" } 数组，方便后续做 CFG 时保留原始行号。
    """
    code_lines = code.splitlines()
    start_line = code_block["start_line"]
    # 这里根据实际情况决定 end_line 是否 +1
    end_line = code_block["end_line"]

    # 先把区间内每行加入集合
    line_set = set(range(start_line, end_line + 1))

    # 从集合中排除掉所有嵌套类/函数的行
    for func in code_block.get("functions", []):
        func_start_line = func.get("start_line", 0)
        func_end_line = func.get("end_line", 0)
        line_set.difference_update(range(func_start_line, func_end_line + 1))

    for cls in code_block.get("classes", []):
        cls_start_line = cls.get("start_line", 0)
        cls_end_line = cls.get("end_line", 0)
        line_set.difference_update(range(cls_start_line, cls_end_line + 1))

    # 剩余行号排序后，保存 { lineno, line } 到 simplified_code
    ordered_lines = sorted(line_set)
    simplified_code_array = []
    for lineno in ordered_lines:
        if 1 <= lineno <= len(code_lines):
            line_content = code_lines[lineno - 1]
        else:
            line_content = ""
        simplified_code_array.append({
            "lineno": lineno,
            "line": line_content
        })

    # 将数组存进 code_block 中
    code_block["simplified_code"] = simplified_code_array


def recursive_get_code_by_line_range(code_block, code):
    """
    递归地为当前块及其所有子类、子函数，计算并存储 simplified_code（带原始行号）。
    """
    get_code_by_line_range(code_block, code)
    for func in code_block.get("functions", []):
        recursive_get_code_by_line_range(func, code)
    for cls in code_block.get("classes", []):
        recursive_get_code_by_line_range(cls, code)


def print_simplified_code(code_block: dict, indent=0):
    """
    递归打印 simplified_code 的内容（仅用于调试或查看），保留行号和内容。
    """
    prefix = " " * indent
    simplified_lines = code_block.get("simplified_code", [])
    print(prefix + "简化后的代码 (行号 -> 内容):")
    for item in simplified_lines:
        print(prefix + f"{item['lineno']:4d}: {item['line']}")

    # 递归处理嵌套的类
    for class_block in code_block.get("classes", []):
        print(prefix + f"\n类 {class_block.get('name', '')}:")
        print_simplified_code(class_block, indent + 2)

    # 递归处理嵌套的函数
    for function_block in code_block.get("functions", []):
        print(prefix + f"\n函数 {function_block.get('name', '')}:")
        print_simplified_code(function_block, indent + 2)


def get_code_cfg_prompt(line_array, program_language):
    """
    给 LLM 的 Prompt，要求其基于该 line_array 生成 CFG 并返回 JSON。
    line_array 的格式形如:
      [
        {"lineno": 10, "line": "def foo():"},
        {"lineno": 11, "line": "..."},
        ...
      ]

    去掉了三重反引号。
    """
    code_as_json = json.dumps(line_array, indent=2)
    prompt = f"""
You will be given a piece of {program_language} code in the form of a JSON array. Each element has two fields:
  - "lineno": the original line number in the code
  - "line": the actual code text on that line

Your goal is to generate a Control Flow Graph (CFG) for this code and output the result as JSON. Here are the specific requirements:

1. Input Format:
   The code is presented as a JSON array of objects, each with "lineno" (int) and "line" (string). For example:

{code_as_json}

(This is the code you need to analyze.)

2. Definition of Basic Blocks:
   - A basic block can contain one or more “continuous and unbranched” statements.
   - Whenever you encounter a statement that causes a flow jump or branch (e.g., if-else, for-while, try-except-finally, with-as, match-case, break-continue-return, etc.), you should start a new basic block.

3. JSON Output Structure:
   Your output must strictly follow this JSON format, with no additional text or explanation:

"blocks": [
  {{
    "id": 1,
    "start_line": 1,
    "end_line": 1,
    "label": "... code of block ...",
    "successors": [
      {{
        "id": 2,
        "start_line": 2,
        "end_line": 3,
        "label": "... code of block ...",
        "successors": [...]
      }}
    ]
  }}
]

   - id: an integer starting from 1, incrementing by 1 for each block.
   - start_line: the first line number (from the input) that belongs to this block.
   - end_line: the last line number (from the input) that belongs to this block.
   - label: the exact code snippet (all lines) inside this block, unchanged from the input lines.
   - successors: a list of nested blocks that may execute after this block. Each item in this list is itself a block with the same structure: "id", "start_line", "end_line", "label", and "successors".

4. Branch Structures:
   - if-else: for if condition: ... else: ..., both the if body and the else body should be separate blocks. The if block’s "successors" should include both branches as nested block objects.
   - for-while: the loop body and the statement(s) following the loop should be in different blocks, with correct flow back to the loop condition if it continues, or forward to the next block if it terminates.
   - try-except-finally: each try, except, and finally block should be identified separately, showing normal and exceptional flows in successors.
   - with-as: the code inside the with statement and the code after the with block should be separate blocks.
   - match-case: treat each case body as a separate nested block in successors.
   - break-continue-return: these statements jump to outside of the loop, back to the loop condition, or end the function. If the function ends, successors can be an empty list.

5. Final Output:
   - Ensure your output is valid JSON (only one root object, containing "blocks").
   - Do not add extra text or explanation—only the JSON object itself.
   - Each block's start_line and end_line must map correctly back to the lineno values in the input JSON array.

Your task: Parse the input line-array, identify all basic blocks with correct start_line, end_line, and label, then produce a single JSON object with the structure above.
"""
    return prompt.strip()


def get_single_block_cfg(code_block, program_language):
    """
    调用 LLM 获取当前 code_block 的 CFG。这里的 simplified_code 是行号和文本的数组。
    """
    line_array = code_block.get("simplified_code", [])
    if not line_array:
        code_block["blocks"] = []
        return

    prompt = get_code_cfg_prompt(line_array, program_language)
    response = get_llm_answers(prompt, model_name="gpt-4o", require_json=True)
    blocks_json = json.loads(response)
    code_block["blocks"] = blocks_json.get("blocks", [])


def recursive_get_each_block_cfg(code_block, program_language):
    """
    递归获取每个代码块（文件级、类级、函数级）的 CFG。
    """
    get_single_block_cfg(code_block, program_language)
    for cls in code_block.get("classes", []):
        recursive_get_each_block_cfg(cls, program_language)
    for func in code_block.get("functions", []):
        recursive_get_each_block_cfg(func, program_language)


def main():
    """
    演示入口。根据实际需求修改 source_code_dir 和文件范围等。
    这里仅示例对 python 文件进行处理，并将结果输出到 JSON。
    """
    source_code_dir = "../../dataset/python"
    target_dir = "llm_cfg_with_line_no"
    os.makedirs(target_dir, exist_ok=True)

    # 示例：我们只处理 0.py ~ 199.py 这 200 个文件
    files = []
    for i in range(200):
        py_file = os.path.join(source_code_dir, f"{i}.py")
        out_file = os.path.join(target_dir, f"{i}.json")
        files.append((py_file, out_file))

    def process_single_file(source_file, target_file):
        if not os.path.exists(source_file):
            return
        if os.path.exists(target_file):
            # 如果目标文件已存在，可以选择跳过，或覆盖，按需决定
            return

        print("Processing", source_file)
        with open(source_file, 'r', encoding='utf-8') as f:
            code = f.read()

        # Step 1: 让大模型找出所有类 / 函数（包含嵌套）
        step1_result = process_file_with_chain_of_thought(code, "python")

        # Step 2: 给每个类 / 函数（及顶层）提取 simplified code（排除嵌套代码行，但保留原始行号）
        recursive_get_code_by_line_range(step1_result, code)

        # Step 3: 对每个简化后的代码块，调用 LLM 生成 CFG
        recursive_get_each_block_cfg(step1_result, "python")

        # 可选：去重逻辑，避免出现重复的 blocks
        def remove_duplicate_blocks(code_block):
            """
            删除同一层级中 (start_line, end_line) 相同的重复块，仅保留最前面一个
            """
            if "blocks" in code_block:
                seen = set()
                unique_blocks = []
                for blk in code_block["blocks"]:
                    s_line = blk.get("start_line", -1)
                    e_line = blk.get("end_line", -1)
                    key = (s_line, e_line)
                    if key not in seen:
                        seen.add(key)
                        unique_blocks.append(blk)
                code_block["blocks"] = unique_blocks

            for sub_cls in code_block.get("classes", []):
                remove_duplicate_blocks(sub_cls)
            for sub_func in code_block.get("functions", []):
                remove_duplicate_blocks(sub_func)

        remove_duplicate_blocks(step1_result)

        # 输出到 JSON
        with open(target_file, "w", encoding="utf-8") as fout:
            json.dump(step1_result, fout, indent=2, ensure_ascii=False)

    # 多线程处理所有文件（可单线程执行以更好查看输出）
    with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
        futures = [executor.submit(process_single_file, src, tgt) for src, tgt in files]
        for _ in tqdm(as_completed(futures), total=len(files), desc="处理CFG文件"):
            pass

    # process_single_file(*files[0])


if __name__ == "__main__":
    main()


Processing ../../dataset/python/1.py
Processing ../../dataset/python/2.py
Processing ../../dataset/python/3.py
Processing ../../dataset/python/4.py
Processing ../../dataset/python/5.py
Processing ../../dataset/python/6.py
Processing ../../dataset/python/7.py
Processing ../../dataset/python/8.py
Processing ../../dataset/python/9.py
Processing ../../dataset/python/10.py
Processing ../../dataset/python/12.py
Processing ../../dataset/python/11.py
Processing ../../dataset/python/13.py
Processing ../../dataset/python/14.py
Processing ../../dataset/python/15.py
Processing ../../dataset/python/16.py
Processing ../../dataset/python/17.py
Processing ../../dataset/python/18.py
Processing ../../dataset/python/19.py
Processing ../../dataset/python/20.py
Processing ../../dataset/python/21.py
Processing ../../dataset/python/22.py
Processing ../../dataset/python/23.py
Processing ../../dataset/python/24.py
Processing ../../dataset/python/25.py
Processing ../../dataset/python/26.py
Processing ../../data

处理CFG文件:   2%|▏         | 3/200 [00:10<11:40,  3.56s/it]

Processing ../../dataset/python/114.py
Processing ../../dataset/python/115.py


处理CFG文件:   2%|▎         | 5/200 [00:13<08:02,  2.47s/it]

Processing ../../dataset/python/116.py


处理CFG文件:   3%|▎         | 6/200 [00:15<07:21,  2.28s/it]

Processing ../../dataset/python/117.py


处理CFG文件:   4%|▎         | 7/200 [00:16<06:29,  2.02s/it]

Processing ../../dataset/python/118.py
Processing ../../dataset/python/119.py


处理CFG文件:   4%|▍         | 9/200 [00:16<03:47,  1.19s/it]

Processing ../../dataset/python/120.py


处理CFG文件:   5%|▌         | 10/200 [00:17<03:39,  1.16s/it]

Processing ../../dataset/python/121.py


处理CFG文件:   6%|▌         | 11/200 [00:18<03:06,  1.01it/s]

Processing ../../dataset/python/122.py


处理CFG文件:   6%|▌         | 12/200 [00:20<04:07,  1.31s/it]

Processing ../../dataset/python/123.py


处理CFG文件:   6%|▋         | 13/200 [00:20<03:17,  1.05s/it]

Processing ../../dataset/python/124.py
Processing ../../dataset/python/125.py


处理CFG文件:   8%|▊         | 16/200 [00:22<02:06,  1.46it/s]

Processing ../../dataset/python/126.py
Processing ../../dataset/python/127.py


处理CFG文件:   8%|▊         | 17/200 [00:22<01:58,  1.54it/s]

Processing ../../dataset/python/128.py


处理CFG文件:   9%|▉         | 18/200 [00:23<02:27,  1.23it/s]

Processing ../../dataset/python/129.py


处理CFG文件:  10%|█         | 21/200 [00:26<01:58,  1.51it/s]

Processing ../../dataset/python/130.py
Processing ../../dataset/python/131.py
Processing ../../dataset/python/132.py


处理CFG文件:  11%|█         | 22/200 [00:26<01:51,  1.60it/s]

Processing ../../dataset/python/133.py


处理CFG文件:  12%|█▏        | 24/200 [00:28<02:01,  1.44it/s]

Processing ../../dataset/python/134.py
Processing ../../dataset/python/135.py
Processing ../../dataset/python/136.py


处理CFG文件:  13%|█▎        | 26/200 [00:29<01:42,  1.69it/s]

Processing ../../dataset/python/137.py


处理CFG文件:  14%|█▍        | 28/200 [00:29<01:11,  2.39it/s]

Processing ../../dataset/python/138.py
Processing ../../dataset/python/139.py


处理CFG文件:  14%|█▍        | 29/200 [00:30<01:00,  2.81it/s]

Processing ../../dataset/python/140.py
Processing ../../dataset/python/141.py


处理CFG文件:  16%|█▌        | 31/200 [00:30<01:02,  2.69it/s]

Processing ../../dataset/python/142.py


处理CFG文件:  16%|█▌        | 32/200 [00:31<01:00,  2.78it/s]

Processing ../../dataset/python/143.py


处理CFG文件:  16%|█▋        | 33/200 [00:31<01:12,  2.30it/s]

Processing ../../dataset/python/144.py


处理CFG文件:  17%|█▋        | 34/200 [00:32<01:18,  2.11it/s]

Processing ../../dataset/python/145.py


处理CFG文件:  18%|█▊        | 35/200 [00:33<01:48,  1.52it/s]

Processing ../../dataset/python/146.py


处理CFG文件:  18%|█▊        | 37/200 [00:34<01:15,  2.15it/s]

Processing ../../dataset/python/147.py
Processing ../../dataset/python/148.py


处理CFG文件:  20%|█▉        | 39/200 [00:34<00:59,  2.73it/s]

Processing ../../dataset/python/149.py
Processing ../../dataset/python/150.py
Processing ../../dataset/python/151.py


处理CFG文件:  20%|██        | 41/200 [00:35<01:09,  2.29it/s]

Processing ../../dataset/python/152.py


处理CFG文件:  21%|██        | 42/200 [00:36<01:04,  2.44it/s]

Processing ../../dataset/python/153.py


处理CFG文件:  22%|██▏       | 43/200 [00:37<01:26,  1.81it/s]

Processing ../../dataset/python/154.py


处理CFG文件:  22%|██▏       | 44/200 [00:37<01:42,  1.51it/s]

Processing ../../dataset/python/155.py
Processing ../../dataset/python/156.py


处理CFG文件:  23%|██▎       | 46/200 [00:38<01:09,  2.21it/s]

Processing ../../dataset/python/157.py


处理CFG文件:  24%|██▎       | 47/200 [00:38<01:06,  2.31it/s]

Processing ../../dataset/python/158.py


处理CFG文件:  24%|██▍       | 48/200 [00:38<00:57,  2.64it/s]

Processing ../../dataset/python/159.py


处理CFG文件:  25%|██▌       | 50/200 [00:40<01:05,  2.29it/s]

Processing ../../dataset/python/160.py
Processing ../../dataset/python/161.py
Processing ../../dataset/python/162.py


处理CFG文件:  26%|██▌       | 52/200 [00:42<02:05,  1.18it/s]

Processing ../../dataset/python/163.py


处理CFG文件:  26%|██▋       | 53/200 [00:44<02:45,  1.13s/it]

Processing ../../dataset/python/164.py


处理CFG文件:  27%|██▋       | 54/200 [00:45<02:12,  1.11it/s]

Processing ../../dataset/python/165.py


处理CFG文件:  28%|██▊       | 55/200 [00:46<02:46,  1.15s/it]

Processing ../../dataset/python/166.py


处理CFG文件:  28%|██▊       | 56/200 [00:49<03:30,  1.46s/it]

Processing ../../dataset/python/167.py


处理CFG文件:  28%|██▊       | 57/200 [00:50<03:30,  1.47s/it]

Processing ../../dataset/python/168.py


处理CFG文件:  29%|██▉       | 58/200 [00:50<02:40,  1.13s/it]

Processing ../../dataset/python/169.py


处理CFG文件:  30%|██▉       | 59/200 [00:51<02:19,  1.01it/s]

Processing ../../dataset/python/170.py


处理CFG文件:  30%|███       | 60/200 [00:52<02:09,  1.08it/s]

Processing ../../dataset/python/171.py


处理CFG文件:  30%|███       | 61/200 [00:53<02:00,  1.15it/s]

Processing ../../dataset/python/172.py


处理CFG文件:  31%|███       | 62/200 [00:53<01:38,  1.40it/s]

Processing ../../dataset/python/173.py


处理CFG文件:  32%|███▏      | 63/200 [00:54<01:33,  1.47it/s]

Processing ../../dataset/python/174.py
Processing ../../dataset/python/175.py


处理CFG文件:  32%|███▎      | 65/200 [00:54<01:09,  1.94it/s]

Processing ../../dataset/python/176.py


处理CFG文件:  33%|███▎      | 66/200 [00:55<01:04,  2.09it/s]

Processing ../../dataset/python/177.py


处理CFG文件:  34%|███▎      | 67/200 [00:55<01:12,  1.85it/s]

Processing ../../dataset/python/178.py


处理CFG文件:  34%|███▍      | 68/200 [00:57<01:58,  1.12it/s]

Processing ../../dataset/python/179.py


处理CFG文件:  34%|███▍      | 69/200 [00:58<01:52,  1.17it/s]

Processing ../../dataset/python/180.py


处理CFG文件:  35%|███▌      | 70/200 [00:58<01:32,  1.41it/s]

Processing ../../dataset/python/181.py


处理CFG文件:  36%|███▌      | 71/200 [00:59<01:34,  1.36it/s]

Processing ../../dataset/python/182.py


处理CFG文件:  36%|███▋      | 73/200 [01:01<01:31,  1.39it/s]

Processing ../../dataset/python/183.py
Processing ../../dataset/python/184.py


处理CFG文件:  37%|███▋      | 74/200 [01:01<01:12,  1.75it/s]

Processing ../../dataset/python/185.py
Processing ../../dataset/python/186.py


处理CFG文件:  38%|███▊      | 76/200 [01:02<01:16,  1.61it/s]

Processing ../../dataset/python/187.py


处理CFG文件:  38%|███▊      | 77/200 [01:04<02:00,  1.02it/s]

Processing ../../dataset/python/188.py


处理CFG文件:  39%|███▉      | 78/200 [01:05<01:44,  1.17it/s]

Processing ../../dataset/python/189.py
Processing ../../dataset/python/190.py


处理CFG文件:  40%|████      | 80/200 [01:06<01:15,  1.59it/s]

Processing ../../dataset/python/191.py


处理CFG文件:  40%|████      | 81/200 [01:06<01:09,  1.71it/s]

Processing ../../dataset/python/192.py


处理CFG文件:  41%|████      | 82/200 [01:07<01:19,  1.49it/s]

Processing ../../dataset/python/193.py


处理CFG文件:  42%|████▏     | 83/200 [01:10<02:47,  1.44s/it]

Processing ../../dataset/python/195.py
Processing ../../dataset/python/194.py


处理CFG文件:  43%|████▎     | 86/200 [01:12<01:32,  1.23it/s]

Processing ../../dataset/python/196.py
Processing ../../dataset/python/197.py
Processing ../../dataset/python/198.py
Processing ../../dataset/python/199.py


处理CFG文件: 100%|██████████| 200/200 [10:16<00:00,  3.08s/it]


### LLM生成的代码可能可以合并

In [5]:
import os
import json

def process_cfg(cfg):
    """
    Process a CFG that uses a *nested successors* structure.
    We will:
      1. Remove unreachable blocks (only keep blocks reachable from the root).
      2. Separate loop headers from loop bodies (if desired).
      3. Merge consecutive linear blocks that have only one successor and one predecessor.
      4. Recursively process functions/classes if they exist.
    """

    #=== 1. 过滤不可达节点: 我们假设 blocks[0] 是 CFG 的根节点 ===#
    def filter_connected_blocks(blocks):
        """
        Given a list of blocks (in nested form), return only those reachable
        from the 'root' block (which we assume is blocks[0]) by traversing
        nested successors.
        """

        visited_ids = set()
        # 为了方便在后面快速通过 id 找到对应的 block 对象，我们先做一个 {id: block} 的映射
        # 同时存储所有 block 的引用（因为是嵌套的，需要把内部 successors 里的 block 也加入到此映射）
        id_to_block = {}

        def collect_all_blocks(block_list):
            for b in block_list:
                id_to_block[b["id"]] = b
                if "successors" in b:
                    collect_all_blocks(b["successors"])

        collect_all_blocks(blocks)

        # 深度优先搜索，查找所有可达节点
        def dfs(block):
            if block["id"] in visited_ids:
                return
            visited_ids.add(block["id"])
            for succ_block in block.get("successors", []):
                dfs(succ_block)

        # 假定 blocks[0] 是 root
        if blocks:
            root_block = blocks[0]
            dfs(root_block)

        # 现在我们只保留被 visited_ids 覆盖到的节点，并且需要“剪枝”不在 visited_ids 中的后继
        def filter_nested(block_list):
            """在嵌套结构中移除不可达节点。"""
            filtered = []
            for b in block_list:
                if b["id"] in visited_ids:
                    # 递归处理 successors
                    new_successors = filter_nested(b.get("successors", []))
                    filtered.append({
                        "id": b["id"],
                        "label": b["label"],
                        "successors": new_successors
                    })
            return filtered

        return filter_nested(blocks)

    #=== 2. 判断循环头（示例仅以 "for" / "while" 关键字简单判断） ===#
    def is_loop_header(block):
        """
        A naive check: if the block's label starts with 'for' or 'while'
        (or contains those keywords in a relevant way), treat it as a loop header.
        """
        code_str = block["label"].strip()
        if code_str.startswith("for ") or code_str.startswith("while "):
            return True
        return False

    #=== 3. 合并逻辑（循环头和循环体暂时不做复杂拆分，仅演示思路） ===#
    # 在嵌套结构中，“连续的线性块”通常表现为一个 block 有且仅有 1 个 successor，且该 successor 只有该一个 predecessor。
    # 但是在嵌套结构里，我们无法简易地统计 predecessor 数量，需要自行设计。
    #
    # 示例逻辑：深度遍历 + 遇到 loop header 不合并；遇到多 successor 不合并；否则合并到下一个 block。
    def merge_blocks_in_place(block):
        """
        递归地合并一个 block 的线性后继。
        当遇到循环头或分支时，不再合并。
        """
        successors = block.get("successors", [])
        if not successors:
            # 无后继，直接返回
            return block

        # 如果存在多个 successor，说明是分支点，不合并任何后继
        if len(successors) > 1:
            # 递归处理每个 successor
            for i, succ in enumerate(successors):
                successors[i] = merge_blocks_in_place(succ)
            block["successors"] = successors
            return block

        # 如果只有 1 个 successor，则尝试合并
        single_succ = successors[0]
        if is_loop_header(block):
            # 如果当前 block 是 loop header，不向后合并，只是递归处理后继
            block["successors"][0] = merge_blocks_in_place(single_succ)
            return block
        if is_loop_header(single_succ):
            # 如果后继是 loop header，也不合并，只是递归处理后继
            block["successors"][0] = merge_blocks_in_place(single_succ)
            return block

        # 到这里，意味着我们可以把 single_succ 跟当前块合并
        block["label"] = block["label"] + "\n" + single_succ["label"]
        # 把 single_succ 的 successors 赋给当前块
        block["successors"] = single_succ.get("successors", [])

        # 递归处理“合并后”依然存在的后继（可能还是一个 list）
        if block["successors"]:
            new_succ_list = []
            for succ in block["successors"]:
                new_succ_list.append(merge_blocks_in_place(succ))
            block["successors"] = new_succ_list

        return block

    #=== 4. 针对最外层的 blocks 做处理 ===#
    #  4.1 过滤掉不可达节点
    if "blocks" in cfg:
        cfg["blocks"] = filter_connected_blocks(cfg["blocks"])

    #  4.2 合并块：因为是多 block，需要逐个处理，然后再把处理结果放回 cfg["blocks"] 
    #      同时，新的根块可能因为合并也会改变，所以我们需要重新搜集并替换
    if "blocks" in cfg and cfg["blocks"]:
        merged = []
        for b in cfg["blocks"]:
            merged_block = merge_blocks_in_place(b)
            merged.append(merged_block)
        cfg["blocks"] = merged

    #=== 5. 递归处理 functions 与 classes ===#
    if "functions" in cfg:
        for func in cfg["functions"]:
            process_cfg(func)

    if "classes" in cfg:
        for cls in cfg["classes"]:
            process_cfg(cls)

    return cfg

#=============================
# 下面是示例读取并处理文件的逻辑
#=============================
import os
import json

for file in os.listdir("llm_cfg_with_line_no"):
    path = os.path.join("llm_cfg_with_line_no", file)
    if not os.path.isfile(path):
        continue

    print("Processing", file)
    with open(path, "r", encoding="utf-8") as f:
        try:
            llm_cfg = json.load(f)
        except Exception as e:
            print(f"Error loading {file}: {str(e)}")
            continue

    try:
        process_cfg(llm_cfg)
    except Exception as e:
        print(f"Error processing {file}: {str(e)}")
        continue

    os.makedirs("merged_llm_cfg_with_line_no", exist_ok=True)
    output_path = os.path.join("merged_llm_cfg_with_line_no", file)
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(llm_cfg, f, indent=2, ensure_ascii=False)


Processing 95.json
Error processing 95.json: 'label'
Processing 110.json
Processing 160.json
Processing 94.json
Processing 38.json
Processing 21.json
Error processing 21.json: 'list' object has no attribute 'strip'
Processing 187.json
Processing 121.json
Processing 72.json
Processing 132.json
Processing 67.json
Processing 149.json
Processing 147.json
Processing 135.json
Processing 4.json
Error processing 4.json: 'list' object has no attribute 'strip'
Processing 74.json
Processing 116.json
Processing 40.json
Processing 178.json
Processing 14.json
Processing 7.json
Processing 166.json
Processing 31.json
Processing 17.json
Processing 167.json
Processing 107.json
Processing 156.json
Processing 89.json
Processing 183.json
Processing 193.json
Processing 176.json
Processing 162.json
Processing 80.json
Processing 136.json
Processing 171.json
Processing 98.json
Processing 106.json
Processing 141.json
Processing 152.json
Processing 96.json
Processing 123.json
Processing 28.json
Processing 150.js

## Visualize

In [18]:
import os
import json
import networkx as nx
import matplotlib.pyplot as plt

# 如果你用 pygraphviz：
from networkx.drawing.nx_agraph import graphviz_layout
# 如果要用 pydot，请改为：
# from networkx.drawing.nx_pydot import pydot_layout

def flatten_cfg(blocks):
    """
    将嵌套结构的 CFG 转换为 (nodes, edges) 两个列表：
      - nodes: [(id, label), ...]
      - edges: [(id1, id2), ...] 表示从 id1 -> id2 的有向边
    """
    nodes = []
    edges = []

    def dfs(block):
        # 将当前块加入 nodes
        nodes.append((block["id"], block["label"]))
        # 遍历 successors
        for succ in block.get("successors", []):
            edges.append((block["id"], succ["id"]))
            dfs(succ)

    for b in blocks:
        dfs(b)

    # 去重
    nodes = list(dict.fromkeys(nodes))
    edges = list(dict.fromkeys(edges))
    return nodes, edges

def visualize_cfg_blocks(blocks, title="CFG", output_path=None, rankdir="TB"):
    """
    使用 Graphviz (dot) 分层布局，可视化嵌套CFG的 blocks，
    并画出带箭头的有向图。这里 label 显示完整，无省略。
    
    参数:
      - blocks: 一个 list，形如 [ {id, label, successors: [...]}, ...]
      - title: 生成图片的标题
      - output_path: 若指定则保存到该文件，否则 plt.show()
      - rankdir: "TB"（自上而下）或 "LR"（自左向右）
    """
    # 1) flatten
    nodes, edges = flatten_cfg(blocks)

    # 2) 构建有向图
    G = nx.DiGraph()
    for node_id, label in nodes:
        G.add_node(node_id, label=label)
    for src, dst in edges:
        G.add_edge(src, dst)

    # 3) 不再截断 label，全部显示
    labels_dict = {}
    for node_id, data in G.nodes(data=True):
        full_label = data["label"] or ""
        # 可以考虑在字符串中替换换行符，避免节点过于高
        # full_label = full_label.replace("\n", "\\n")
        # 或者直接原样显示（某些布局器可能会把换行符当作字符串，而不会自动换行）
        labels_dict[node_id] = f"{node_id}: {full_label}"

    # 4) 使用 dot 布局，指定 rankdir
    # prog="dot" 对应分层布局，-Grankdir 控制自上而下(TB)或自左向右(LR)
    pos = graphviz_layout(G, prog="dot", args=f"-Grankdir={rankdir}")

    # 5) 绘图 (带箭头)
    plt.figure(figsize=(14, 8))
    nx.draw_networkx_nodes(G, pos, node_color="lightblue", node_size=1800, edgecolors="black")
    nx.draw_networkx_edges(
        G, pos,
        arrows=True,
        arrowstyle="->",
        arrowsize=15,
        connectionstyle="arc3,rad=0.1"
    )
    nx.draw_networkx_labels(G, pos, labels=labels_dict, font_size=8)

    plt.title(title)
    plt.axis("off")

    if output_path:
        plt.savefig(output_path, dpi=150, bbox_inches="tight")
        plt.close()
        print(f"Saved figure: {output_path}")
    else:
        plt.show()

def visualize_cfg_separately(cfg_data, base_name="cfg", output_dir="visualized_llm_cfg", rankdir="TB"):
    """
    将同一个文件中的顶层 blocks、每个 function 的 blocks、每个 class 的 blocks
    分别绘制不同子图，存成多个文件。文件保存在 output_dir/base_name/ 下。
    
    参数:
      - cfg_data: 形如 {"blocks": [...], "functions": [...], "classes": [...]} 的字典
      - base_name: 例如 "0"、"1" 等文件名前缀
      - output_dir: 例如 "visualized_llm_cfg"
      - rankdir: "TB"（自上而下）或 "LR"（自左向右）
    """
    # 先创建子目录: visualized_llm_cfg/0/ 之类
    sub_dir = os.path.join(output_dir, base_name)
    os.makedirs(sub_dir, exist_ok=True)

    # 1) 绘制顶层 blocks
    if "blocks" in cfg_data and cfg_data["blocks"]:
        out_path = os.path.join(sub_dir, f"top.png")
        visualize_cfg_blocks(
            cfg_data["blocks"],
            title=f"{base_name}_top",
            output_path=out_path,
            rankdir=rankdir
        )

    # 2) 绘制每个 function
    if "functions" in cfg_data and cfg_data["functions"]:
        for i, func in enumerate(cfg_data["functions"], start=1):
            func_name = func.get("name", f"func_{i}")
            out_path = os.path.join(sub_dir, f"func_{func_name}.png")
            if "blocks" in func and func["blocks"]:
                visualize_cfg_blocks(
                    func["blocks"],
                    title=f"{base_name}_func_{func_name}",
                    output_path=out_path,
                    rankdir=rankdir
                )

    # 3) 绘制每个 class
    if "classes" in cfg_data and cfg_data["classes"]:
        for i, cls in enumerate(cfg_data["classes"], start=1):
            cls_name = cls.get("name", f"class_{i}")
            out_path = os.path.join(sub_dir, f"class_{cls_name}.png")
            if "blocks" in cls and cls["blocks"]:
                visualize_cfg_blocks(
                    cls["blocks"],
                    title=f"{base_name}_class_{cls_name}",
                    output_path=out_path,
                    rankdir=rankdir
                )

def visualize_cfg_file_separately(file_path, output_dir="visualized_llm_cfg", rankdir="TB"):
    """
    给定一个 JSON 文件，分顶层/函数/类分别绘图并保存。
    目标目录: output_dir/<文件名前缀>/...
    """
    with open(file_path, "r", encoding="utf-8") as f:
        llm_cfg = json.load(f)

    base_name = os.path.splitext(os.path.basename(file_path))[0]
    visualize_cfg_separately(
        llm_cfg,
        base_name=base_name,
        output_dir=output_dir,
        rankdir=rankdir
    )

# ====== 示例：批量处理 "merged_llm_cfg" 文件夹下的所有 JSON 文件 ====== #
if __name__ == "__main__":
    folder = "merged_llm_cfg"
    for file_name in os.listdir(folder):
        if not file_name.lower().endswith(".json"):
            continue
        path = os.path.join(folder, file_name)
        print("Processing:", file_name)
        try:
            # 用 rankdir="TB" 自上而下布局；可改成 rankdir="LR" 自左向右。
            visualize_cfg_file_separately(path, output_dir="visualized_llm_cfg", rankdir="TB")
        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}")


Processing: 0.json
Saved figure: visualized_llm_cfg/0/top.png
Saved figure: visualized_llm_cfg/0/func_create_test_user.png
Saved figure: visualized_llm_cfg/0/func_create_test_graph.png
Saved figure: visualized_llm_cfg/0/func_sample_agent.png


### Compare CFG

In [1]:
import json
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from multiprocessing import cpu_count
from typing import Dict, List, Tuple
from dataclasses import dataclass
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from tqdm import tqdm
from llm import get_llm_answers

def compare_cfg_similarity(llm_cfg, static_cfg):
    max_retries = 3
    retry_count = 0
    
    while retry_count < max_retries:
        try:
            prompt = f"""
You are a CFG evaluator to evaluate whether the generated CFG is correct based on the static CFG.

You should first compare the structure of the CFG, then compare the content of the CFG. Focus on the flow of the CFG and ignore the details such as content and block_id, block_name.

Your output should be a json with the following format:
{{
    "reasonable": true/false,
    "structure_similarity": 0.8,
    "content_similarity": 0.9,
    "total_similarity": 0.85,
    "reason": ""
}}

Ground truth:
{static_cfg}

Generated CFG:
{llm_cfg}
"""
            similarity = json.loads(get_llm_answers(prompt, model_name="gpt-4o", require_json=True))
            return similarity
            
        except Exception as e:
            retry_count += 1
            if retry_count == max_retries:
                raise e
            print(f"重试第{retry_count}次,错误信息:{str(e)}")

from dataclasses import dataclass
from typing import Dict, List, Optional, Union
import json
import numpy as np
from pathlib import Path

@dataclass
class CFGSimilarityResult:
    """存储CFG比较结果的数据类"""
    filename: str
    edge_coverage: float
    content_similarity: float
    structure_similarity: float
    matched_edges: int
    gt_edges: int
    llm_edges: int
    nested_results: Optional[Dict[str, 'CFGSimilarityResult']] = None
    llm_similarity: Optional[Dict[str, Union[float, bool]]] = None

class CFGComparator:
    def __init__(self):
        """初始化CFG比较器"""
        pass
    
    @staticmethod
    def count_edges(cfg: Dict) -> int:
        """递归计算CFG中的边数量"""
        edge_count = sum(
            len(block.get("successors", []))
            for block in cfg.get("blocks", [])
        )
        
        # 递归计算嵌套CFG的边
        for func in cfg.get("functions", []):  # functions是列表
            edge_count += CFGComparator.count_edges(func)
        for cls in cfg.get("classes", []):     # classes是列表
            edge_count += CFGComparator.count_edges(cls)
            
        return edge_count

    def structure_similarity(self, llm_cfg: Dict, static_cfg: Dict) -> float:
        """计算两个CFG的结构相似度"""
        # 获取两个CFG的blocks
        llm_blocks = llm_cfg.get("blocks", [])
        static_blocks = static_cfg.get("blocks", [])
        
        # 如果两个CFG都没有blocks，返回1.0
        if not llm_blocks and not static_blocks:
            return 1.0
        # 如果其中一个没有blocks，返回0.0
        if not llm_blocks or not static_blocks:
            return 0.0
            
        # 计算边的匹配度
        llm_edges = sum(len(block.get("successors", [])) for block in llm_blocks)
        static_edges = sum(len(block.get("successors", [])) for block in static_blocks)
        
        if llm_edges == 0 and static_edges == 0:
            return 1.0
        if llm_edges == 0 or static_edges == 0:
            return 0.0
            
        # 使用边数的比例计算相似度
        return min(llm_edges, static_edges) / max(llm_edges, static_edges)

    def content_similarity(self, llm_cfg: Dict, static_cfg: Dict) -> float:
        """计算两个CFG的内容相似度"""
        # 获取简化的代码内容
        llm_code = llm_cfg.get("simplified_code", "")
        static_code = static_cfg.get("simplified_code", "")
        
        # 如果两个都为空，返回1.0
        if not llm_code and not static_code:
            return 1.0
        # 如果其中一个为空，返回0.0
        if not llm_code or not static_code:
            return 0.0
        
        # 将代码分割成行并去除空白行
        llm_lines = [line.strip() for line in llm_code.split("\n") if line.strip()]
        static_lines = [line.strip() for line in static_code.split("\n") if line.strip()]
        
        # 计算行的匹配度
        common_lines = set(llm_lines) & set(static_lines)
        return len(common_lines) / max(len(llm_lines), len(static_lines))

    def compare_cfgs(self, llm_cfg: Dict, static_cfg: Dict, name: str) -> CFGSimilarityResult:
        """递归比较两个CFG并返回相似度结果"""
        # 计算当前层级的相似度
        structure_sim = self.structure_similarity(llm_cfg, static_cfg)
        content_sim = self.content_similarity(llm_cfg, static_cfg)
        
        # 计算边的统计信息
        gt_edges = self.count_edges(static_cfg)
        llm_edges = self.count_edges(llm_cfg)
        matched_edges = int(structure_sim * min(gt_edges, llm_edges))
        edge_coverage = matched_edges / gt_edges if gt_edges > 0 else 0
        
        # 递归比较嵌套的CFG
        nested_results = {}
        
        # 比较函数CFG
        llm_functions = {f["name"]: f for f in llm_cfg.get("functions", [])}
        static_functions = {f["name"]: f for f in static_cfg.get("functions", [])}
        common_functions = set(llm_functions.keys()) & set(static_functions.keys())
        
        for func_name in common_functions:
            nested_results[f"function_{func_name}"] = self.compare_cfgs(
                llm_functions[func_name],
                static_functions[func_name],
                func_name
            )
        
        # 比较类CFG
        llm_classes = {c["name"]: c for c in llm_cfg.get("classes", [])}
        static_classes = {c["name"]: c for c in static_cfg.get("classes", [])}
        common_classes = set(llm_classes.keys()) & set(static_classes.keys())
        
        for class_name in common_classes:
            nested_results[f"class_{class_name}"] = self.compare_cfgs(
                llm_classes[class_name],
                static_classes[class_name],
                class_name
            )
        
        return CFGSimilarityResult(
            filename=name,
            edge_coverage=edge_coverage,
            content_similarity=content_sim,
            structure_similarity=structure_sim,
            matched_edges=matched_edges,
            gt_edges=gt_edges,
            llm_edges=llm_edges,
            nested_results=nested_results if nested_results else None,
            llm_similarity=None  # 将在process_file中设置
        )

class CFGEvaluator:
    def __init__(self, llm_cfg_dir: str, static_cfg_dir: str, result_file: str):
        """初始化评估器
        
        Args:
            llm_cfg_dir: LLM生成的CFG文件目录
            static_cfg_dir: 静态分析生成的CFG文件目录
            result_file: 结果保存文件路径
        """
        self.llm_cfg_dir = Path(llm_cfg_dir)
        self.static_cfg_dir = Path(static_cfg_dir)
        self.result_file = Path(result_file)
        self.comparator = CFGComparator()
        self.results = []  # 存储所有结果
    
    def process_file(self, llm_cfg_path: Path) -> Optional[CFGSimilarityResult]:
        """处理单个CFG文件对的比较
        
        Args:
            llm_cfg_path: LLM生成的CFG文件路径
            
        Returns:
            CFGSimilarityResult 或 None（如果没有对应的静态CFG文件）
        """
        # 获取对应的静态CFG文件路径
        static_cfg_path = self.static_cfg_dir / llm_cfg_path.name
        if not static_cfg_path.exists():
            return None
            
        # 读取CFG文件
        with open(llm_cfg_path) as f:
            llm_cfg = json.load(f)
        with open(static_cfg_path) as f:
            static_cfg = json.load(f)
            
        # 比较CFG
        result = self.comparator.compare_cfgs(llm_cfg, static_cfg, llm_cfg_path.name)
        llm_sim = compare_cfg_similarity(llm_cfg, static_cfg)
        result.llm_similarity = llm_sim
        
        # 将结果添加到列表并保存
        self.results.append(result)
        self.save_results()
        
        return result
    
    def save_results(self):
        """保存当前所有结果到文件"""
        with open(self.result_file, "w") as f:
            json.dump(
                [self._result_to_dict(r) for r in self.results],
                f,
                indent=2
            )
    
    def evaluate_all(self) -> List[CFGSimilarityResult]:
        """评估所有CFG文件对
        
        Returns:
            所有比较结果的列表
        """
        # 处理每个LLM生成的CFG文件
        llm_cfg_paths = list(self.llm_cfg_dir.glob("*.json"))
        
        # 使用多线程并行处理
        with ThreadPoolExecutor() as executor:
            futures = []
            for llm_cfg_path in llm_cfg_paths:
                future = executor.submit(self.process_file, llm_cfg_path)
                futures.append(future)
            
            # 使用tqdm显示进度
            for future in tqdm(as_completed(futures), total=len(futures), desc="处理CFG文件"):
                future.result()
                
        return self.results
    
    @staticmethod
    def _result_to_dict(result: CFGSimilarityResult) -> Dict:
        """将CFGSimilarityResult转换为可JSON序列化的字典"""
        return {
            "filename": result.filename,
            "edge_coverage": result.edge_coverage,
            "content_similarity": result.content_similarity,
            "structure_similarity": result.structure_similarity,
            "matched_edges": result.matched_edges,
            "gt_edges": result.gt_edges,
            "llm_edges": result.llm_edges,
            "nested_results": {
                k: CFGEvaluator._result_to_dict(v)
                for k, v in result.nested_results.items()
            } if result.nested_results else None,
            "llm_similarity": result.llm_similarity
        }

def calculate_aggregate_metrics(results: List[CFGSimilarityResult]) -> Dict:
    """计算聚合指标
    
    Args:
        results: CFGSimilarityResult列表
        
    Returns:
        包含聚合指标的字典
    """
    metrics = {
        "total_cfgs_compared": len(results),
        "average_edge_coverage": np.mean([r.edge_coverage for r in results]),
        "average_content_similarity": np.mean([r.content_similarity for r in results]),
        "average_structure_similarity": np.mean([r.structure_similarity for r in results]),
        "total_gt_edges": sum(r.gt_edges for r in results),
        "total_llm_edges": sum(r.llm_edges for r in results),
        "total_matched_edges": sum(r.matched_edges for r in results)
    }
    return metrics

def main():
    evaluator = CFGEvaluator(
        llm_cfg_dir="merged_llm_cfg_50",
        static_cfg_dir="static_cfg",
        result_file="evaluation_results_50.json"
    )
    
    # 评估所有CFG
    results = evaluator.evaluate_all()
    
    # 计算统计指标
    metrics = calculate_aggregate_metrics(results)
    
    # 计算LLM评估的平均值
    llm_metrics = {
        "average_llm_structure_similarity": np.mean([
            r.llm_similarity["structure_similarity"] 
            for r in results if r.llm_similarity
        ]),
        "average_llm_content_similarity": np.mean([
            r.llm_similarity["content_similarity"] 
            for r in results if r.llm_similarity
        ]),
        "average_llm_total_similarity": np.mean([
            r.llm_similarity["total_similarity"] 
            for r in results if r.llm_similarity
        ]),
        "reasonable_percentage": np.mean([
            float(r.llm_similarity["reasonable"]) 
            for r in results if r.llm_similarity
        ]) * 100
    }
    
    # 输出评估结果
    print("\nAutomatic Evaluation Summary:")
    print(f"Total CFGs compared: {metrics['total_cfgs_compared']}")
    print(f"Average Edge Coverage: {metrics['average_edge_coverage']:.2f}")
    print(f"Average Content Similarity: {metrics['average_content_similarity']:.2f}")
    print(f"Average Structure Similarity: {metrics['average_structure_similarity']:.2f}")
    
    print("\nLLM Evaluation Summary:")
    print(f"Average Structure Similarity: {llm_metrics['average_llm_structure_similarity']:.2f}")
    print(f"Average Content Similarity: {llm_metrics['average_llm_content_similarity']:.2f}")
    print(f"Average Total Similarity: {llm_metrics['average_llm_total_similarity']:.2f}")
    print(f"Reasonable Percentage: {llm_metrics['reasonable_percentage']:.1f}%")
    
    # 保存完整的评估指标
    metrics.update(llm_metrics)
    with open("evaluation_metrics_50.json", "w") as f:
        json.dump(metrics, f, indent=2)

if __name__ == "__main__":
    main()


处理CFG文件: 100%|██████████| 48/48 [00:18<00:00,  2.60it/s]


Automatic Evaluation Summary:
Total CFGs compared: 48
Average Edge Coverage: 0.52
Average Content Similarity: 0.00
Average Structure Similarity: 0.83

LLM Evaluation Summary:
Average Structure Similarity: 0.84
Average Content Similarity: 0.86
Average Total Similarity: 0.85
Reasonable Percentage: 77.1%



