In [8]:
import ast
from typing import List, Set, Dict, Any
import json
from pathlib import Path

class BasicBlock:
    def __init__(self, id: int):
        self.id = id
        self.statements: List[ast.AST] = []
        self.next: 'BasicBlock' = None
        self.true_branch: 'BasicBlock' = None
        self.false_branch: 'BasicBlock' = None

class CFGBuilder(ast.NodeVisitor):
    def __init__(self):
        self.current_block = None
        self.blocks: List[BasicBlock] = []
        self.block_counter = 0
        
    def new_block(self) -> BasicBlock:
        self.block_counter += 1
        block = BasicBlock(self.block_counter)
        self.blocks.append(block)
        return block
    
    def add_statement(self, stmt: ast.AST):
        if self.current_block is None:
            self.current_block = self.new_block()
        self.current_block.statements.append(stmt)
    
    def visit_If(self, node: ast.If):
        # 保存当前块，它将包含if条件
        test_block = self.current_block or self.new_block()
        test_block.statements.append(node.test)
        
        # 创建true分支块
        true_block = self.new_block()
        test_block.true_branch = true_block
        
        # 创建false分支块
        false_block = self.new_block()
        test_block.false_branch = false_block
        
        # 创建合并块
        merge_block = self.new_block()
        
        # 处理true分支
        self.current_block = true_block
        for stmt in node.body:
            self.visit(stmt)
        if self.current_block:
            self.current_block.next = merge_block
        
        # 处理false分支
        self.current_block = false_block
        for stmt in node.orelse:
            self.visit(stmt)
        if self.current_block:
            self.current_block.next = merge_block
        
        self.current_block = merge_block
    
    def visit_While(self, node: ast.While):
        # 创建while条件块
        test_block = self.new_block()
        if self.current_block:
            self.current_block.next = test_block
        test_block.statements.append(node.test)
        
        # 创建循环体块
        body_block = self.new_block()
        test_block.true_branch = body_block
        
        # 创建出口块
        exit_block = self.new_block()
        test_block.false_branch = exit_block
        
        # 处理循环体
        self.current_block = body_block
        for stmt in node.body:
            self.visit(stmt)
        if self.current_block:
            self.current_block.next = test_block
        
        self.current_block = exit_block
    
    def visit_For(self, node: ast.For):
        # 类似于while循环的处理
        test_block = self.new_block()
        if self.current_block:
            self.current_block.next = test_block
        test_block.statements.append(node.target)
        test_block.statements.append(node.iter)
        
        body_block = self.new_block()
        test_block.true_branch = body_block
        
        exit_block = self.new_block()
        test_block.false_branch = exit_block
        
        self.current_block = body_block
        for stmt in node.body:
            self.visit(stmt)
        if self.current_block:
            self.current_block.next = test_block
        
        self.current_block = exit_block
    
    def visit_Return(self, node: ast.Return):
        self.add_statement(node)
        self.current_block = None
    
    def generic_visit(self, node: ast.AST):
        if isinstance(node, ast.stmt):
            self.add_statement(node)
        super().generic_visit(node)

def ast_node_to_str(node: ast.AST) -> str:
    if isinstance(node, ast.Expr):
        return ast_node_to_str(node.value)
    elif isinstance(node, ast.Call):
        func = ast_node_to_str(node.func)
        args = [ast_node_to_str(arg) for arg in node.args]
        return f"{func}({', '.join(args)})"
    elif isinstance(node, ast.Name):
        return node.id
    elif isinstance(node, ast.Constant):
        return repr(node.value)
    elif isinstance(node, ast.Compare):
        left = ast_node_to_str(node.left)
        op = type(node.ops[0]).__name__
        right = ast_node_to_str(node.comparators[0])
        return f"{left} {op} {right}"
    elif isinstance(node, ast.BinOp):
        left = ast_node_to_str(node.left)
        op = type(node.op).__name__
        right = ast_node_to_str(node.right)
        return f"{left} {op} {right}"
    else:
        return ast.unparse(node)

def build_cfg_json(source_code: str) -> Dict[str, Any]:
    # 解析源代码
    tree = ast.parse(source_code)
    
    # 构建CFG
    cfg_builder = CFGBuilder()
    cfg_builder.visit(tree)
    
    # 转换为JSON格式
    nodes = []
    edges = []
    
    # 添加节点
    for block in cfg_builder.blocks:
        nodes.append({
            "id": str(block.id),
            "type": "block",
            "statements": [ast_node_to_str(stmt) for stmt in block.statements]
        })
        
        # 添加边
        if block.next:
            edges.append({
                "source": str(block.id),
                "target": str(block.next.id),
                "type": "next"
            })
        if block.true_branch:
            edges.append({
                "source": str(block.id),
                "target": str(block.true_branch.id),
                "type": "true"
            })
        if block.false_branch:
            edges.append({
                "source": str(block.id),
                "target": str(block.false_branch.id),
                "type": "false"
            })
    
    return {
        "nodes": nodes,
        "edges": edges
    }

def process_python_file(file_path: Path):
    """处理单个Python文件，生成JSON"""
    try:
        # 读取文件内容
        with open(file_path, 'r', encoding='utf-8') as f:
            source_code = f.read()
        
        # 构建CFG并转换为JSON
        cfg_json = build_cfg_json(source_code)

        print(json.dumps(cfg_json, indent=2))
        
        # 创建输出目录
        output_dir = Path('cfg_output')
        output_dir.mkdir(exist_ok=True)
        
        # 保存JSON文件
        json_path = output_dir / f"{file_path.stem}.json"
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump(cfg_json, f, indent=2, ensure_ascii=False)
        
        return True
        
    except Exception as e:
        print(f"处理文件 {file_path} 时出错: {str(e)}")
        return False

def process_directory(directory: str):
    """遍历目录处理所有Python文件"""
    directory = Path(directory)
    
    if not directory.exists():
        raise ValueError(f"目录不存在: {directory}")
    
    for file_path in directory.rglob("*.py"):
        if file_path.name != "1.py":
            continue
        print(f"处理文件: {file_path}")
        process_python_file(file_path)

if __name__ == "__main__":
    data_dir = "./cfg_task/source_code"
    process_directory(data_dir)


处理文件: cfg_task/source_code/1.py
{
  "nodes": [
    {
      "id": "1",
      "type": "block",
      "statements": [
        "from enum import Enum",
        "from typing import Any, Dict, Literal, Optional",
        "from pydantic import SecretStr",
        "from requests.exceptions import RequestException",
        "from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema",
        "from backend.data.model import APIKeyCredentials, CredentialsField, CredentialsMetaInput, SchemaField",
        "from backend.integrations.providers import ProviderName",
        "from backend.util.request import requests",
        "TEST_CREDENTIALS = APIKeyCredentials(id='01234567-89ab-cdef-0123-456789abcdef', provider='ideogram', api_key=SecretStr('mock-ideogram-api-key'), title='Mock Ideogram API key', expires_at=None)",
        "TEST_CREDENTIALS_INPUT = {'provider': TEST_CREDENTIALS.provider, 'id': TEST_CREDENTIALS.id, 'type': TEST_CREDENTIALS.type, 'title': TEST_CREDENTIALS.type}",