In [26]:
import json
from scalpel.call_graph.pycg import CallGraphGenerator, formats

def filter_call_graph(call_graph: dict, prefix: str) -> dict:
    # 1) 先只保留 prefix 开头的键
    prefix_only = {}
    for original_key, calls in call_graph.items():
        if original_key == prefix:
            new_key = "main"
        elif original_key.startswith(prefix + "."):
            new_key = original_key[len(prefix) + 1:]
        else:
            # 不以 prefix 开头 => 丢弃
            continue

        new_calls = []
        for c in calls:
            if c == prefix:
                new_calls.append("main")
            elif c.startswith(prefix + "."):
                new_calls.append(c[len(prefix) + 1:])
            else:
                # 不以 prefix 开头的调用，原样保留
                new_calls.append(c)

        prefix_only[new_key] = new_calls

    # 2) 看看 prefix_only 中的所有调用，收集可能需要追加的键
    #    （例如 "backend.data.user.get_or_create_user"）若存在于原 call_graph，且想保留，则追加
    # needed_extras = {}
    # for new_key, calls in prefix_only.items():
    #     for callee in calls:
    #         if callee in call_graph:
    #             # 这说明 callee 也是原字典里的某个 key
    #             # 根据你需求决定是否把它也放进结果
    #             # 例如只想保留 "backend.data.user.*" 这样的：
    #             if callee.startswith("backend.data.user."):
    #                 needed_extras[callee] = call_graph[callee]

    # # 3) 将 needed_extras 并入 prefix_only
    # for k, v in needed_extras.items():
    #     prefix_only[k] = v  # 这里不做重命名，因为它本身并不以 prefix 开头

    return prefix_only

import os
if __name__ == "__main__":
    source_code_dir = "../../dataset/python"
    static_cg_dir = "../../dataset/python_cg"
    os.makedirs(static_cg_dir, exist_ok=True)
    for i in range(200):
        if not os.path.exists(f"{source_code_dir}/{i}.py"):
            continue
        cg_generator = CallGraphGenerator([f"{source_code_dir}/{i}.py"], source_code_dir)
        cg_generator.analyze()
        formatter = formats.Simple(cg_generator)

        # 原始的 call graph
        original_graph = formatter.generate()
        # with open(f"{static_cg_dir}/{i}.json", "w", encoding="utf-8") as f:
        #     json.dump(original_graph, f, indent=4, ensure_ascii=False)

        # 假设你想只保留“.........dataset.python.0”开头的
        prefix_to_keep = f"{i}"
        # 如果文件名会变，例如变成 1.py、2.py 等，可根据实际情况调整 prefix_to_keep 的生成逻辑

        filtered_graph = filter_call_graph(original_graph, prefix_to_keep)

        # 将结果写入文件
        with open(f"{static_cg_dir}/{i}.json", "w", encoding="utf-8") as f:
            json.dump(filtered_graph, f, indent=4, ensure_ascii=False)
