In [21]:
from scalpel.cfg import CFGBuilder

file_path = "source_code/1.py"
src = open(file_path, 'r', encoding='utf-8').read()
cfg = CFGBuilder().build_from_src("1", src)
print(cfg)

CFG for 1


In [44]:
from typing import Optional, List, Dict
from dataclasses import dataclass, field
import ast
import json

@dataclass
class Link:
    """控制流边：表示两个基本块之间的跳转关系"""
    source: str  # Block ID
    target: str  # Block ID
    condition: Optional[str] = None  # 跳转条件，使用字符串存储以便JSON序列化
    
    def to_dict(self) -> dict:
        return {
            'source': self.source,
            'target': self.target,
            'condition': self.condition
        }

@dataclass
class Block:
    """基本块：包含一系列顺序执行的语句"""
    id: str
    statements: List[str] = field(default_factory=list)  # 存储语句的字符串表示
    exits: List[Link] = field(default_factory=list)
    predecessors: List[str] = field(default_factory=list)  # Block IDs
    
    def to_dict(self) -> dict:
        return {
            'id': self.id,
            'statements': self.statements,
            'exits': [exit.to_dict() for exit in self.exits],
            'predecessors': self.predecessors
        }

@dataclass
class CFG:
    """控制流图：表示一个函数或模块的控制流"""
    name: str  # 函数/类/模块名称
    blocks: Dict[str, Block] = field(default_factory=dict)  # Block ID -> Block
    entry_block: Optional[str] = None  # Entry Block ID
    exit_blocks: List[str] = field(default_factory=list)  # Exit Block IDs
    functioncfgs: Dict[str, 'CFG'] = field(default_factory=dict)  # 内部函数的CFG
    class_cfgs: Dict[str, 'CFG'] = field(default_factory=dict)  # 内部类的CFG
    
    def add_block(self, block: Block) -> None:
        """添加一个基本块"""
        self.blocks[block.id] = block
    
    def add_link(self, source_id: str, target_id: str, condition: Optional[str] = None) -> None:
        """添加一个控制流边"""
        if source_id in self.blocks and target_id in self.blocks:
            link = Link(source_id, target_id, condition)
            self.blocks[source_id].exits.append(link)
            if source_id not in self.blocks[target_id].predecessors:
                self.blocks[target_id].predecessors.append(source_id)
    
    def to_dict(self) -> dict:
        """将CFG转换为可JSON序列化的字典"""
        return {
            'name': self.name,
            'blocks': {bid: block.to_dict() for bid, block in self.blocks.items()},
            'entry_block': self.entry_block,
            'exit_blocks': self.exit_blocks,
            'functioncfgs': {name: cfg.to_dict() for name, cfg in self.functioncfgs.items()},
            'class_cfgs': {name: cfg.to_dict() for name, cfg in self.class_cfgs.items()}
        }
    
    @classmethod
    def from_dict(cls, data: dict) -> 'CFG':
        """从字典创建CFG对象"""
        cfg = cls(name=data['name'])
        cfg.entry_block = data['entry_block']
        cfg.exit_blocks = data['exit_blocks']
        
        # 重建基本块
        for bid, block_data in data['blocks'].items():
            block = Block(
                id=block_data['id'],
                statements=block_data['statements'],
                predecessors=block_data['predecessors']
            )
            # 重建控制流边
            for exit_data in block_data['exits']:
                block.exits.append(Link(
                    source=exit_data['source'],
                    target=exit_data['target'],
                    condition=exit_data['condition']
                ))
            cfg.blocks[bid] = block
        
        # 递归重建嵌套的CFG
        cfg.functioncfgs = {
            name: CFG.from_dict(fcfg_data)
            for name, fcfg_data in data['functioncfgs'].items()
        }
        cfg.class_cfgs = {
            name: CFG.from_dict(ccfg_data)
            for name, ccfg_data in data['class_cfgs'].items()
        }
        
        return cfg

def convert_scalpel_cfg(scalpel_cfg) -> CFG:
    """将Scalpel的CFG转换为简化版CFG"""
    cfg = CFG(name=getattr(scalpel_cfg, 'name', ''))
    
    def ast_to_str(node: ast.AST) -> str:
        """将AST节点转换为字符串表示"""
        return ast.unparse(node) if isinstance(node, ast.AST) else str(node)
    
    # 处理基本块
    for scalpel_block in scalpel_cfg.get_all_blocks():
        block = Block(
            id=str(scalpel_block.id),
            statements=[ast_to_str(stmt) for stmt in scalpel_block.statements],
            predecessors=[str(pred) for pred in scalpel_block.predecessors]
        )
        
        # 处理控制流边
        for exit in scalpel_block.exits:
            block.exits.append(Link(
                source=str(scalpel_block.id),
                target=str(exit.target.id),
                condition=ast_to_str(exit.exitcase) if exit.exitcase else None
            ))
        
        cfg.add_block(block)
        
        # 设置入口块和出口块
        if scalpel_block == scalpel_cfg.entryblock:
            cfg.entry_block = block.id
        if not block.exits:
            cfg.exit_blocks.append(block.id)
    
    # 递归处理嵌套的CFG
    if hasattr(scalpel_cfg, 'functioncfgs'):
        for func_name, func_cfg in scalpel_cfg.functioncfgs.items():
            cfg.functioncfgs[str(func_name)] = convert_scalpel_cfg(func_cfg)
    
    if hasattr(scalpel_cfg, 'class_cfgs'):
        for class_name, class_cfg in scalpel_cfg.class_cfgs.items():
            cfg.class_cfgs[str(class_name)] = convert_scalpel_cfg(class_cfg)
    
    return cfg

def save_cfg_to_json(cfg: CFG, filename: str) -> None:
    """将CFG保存为JSON文件"""
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(cfg.to_dict(), f, indent=2, ensure_ascii=False)

def load_cfg_from_json(filename: str) -> CFG:
    """从JSON文件加载CFG"""
    with open(filename, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return CFG.from_dict(data)


In [45]:
new_cfg = convert_scalpel_cfg(cfg)
save_cfg_to_json(new_cfg, "output_cfg.json")