In [31]:
from dataclasses import dataclass
from typing import List, Dict, Optional
import json
from pathlib import Path

@dataclass
class CFGNode:
    id: str
    code: str
    order: int  # 添加order来保持顺序

@dataclass
class CFGEdge:
    from_node: str
    to_node: str

@dataclass
class CFGData:
    nodes: List[CFGNode]
    edges: List[CFGEdge]

@dataclass
class CodeBlock:
    decl_name: str
    start_line: int
    end_line: int
    children: List['CodeBlock']
    code: str
    cfg: Optional[CFGData] = None

def parse_json_to_cfg(json_data: dict) -> CodeBlock:
    """将JSON数据解析为CodeBlock对象"""
    cfg = None
    if 'cfg' in json_data:
        cfg_data = json_data['cfg']
        # 创建节点，添加order属性
        nodes = [
            CFGNode(
                id=node['id'], 
                code=node['code'],
                order=idx  # 使用索引作为顺序
            )
            for idx, node in enumerate(cfg_data['nodes'])
        ]
        # 创建边
        edges = [
            CFGEdge(
                from_node=edge['from'], 
                to_node=edge['to']
            )
            for edge in cfg_data['edges']
        ]
        cfg = CFGData(nodes=nodes, edges=edges)

    children = [
        parse_json_to_cfg(child) 
        for child in json_data.get('children', [])
    ]

    return CodeBlock(
        decl_name=json_data['decl_name'],
        start_line=json_data['start_line'],
        end_line=json_data['end_line'],
        code=json_data['code'],
        children=children,
        cfg=cfg
    )

def can_merge_global_nodes(node1: CFGNode, node2: CFGNode, edges: List[CFGEdge]) -> bool:
    """判断全局作用域中的节点是否可以合并"""
    # 检查是否都是全局作用域的节点
    if not (node1.id.startswith("GlobalBlock_") and node2.id.startswith("GlobalBlock_")):
        return False
        
    # 检查节点间是否有其他控制流（如if/else, try/except等）
    node1_code = node1.code.strip()
    node2_code = node2.code.strip()
    
    # 简单检查是否都是导入语句或简单的赋值语句
    def is_simple_statement(code: str) -> bool:
        lines = code.split('\n')
        for line in lines:
            line = line.strip()
            if not line:
                continue
            if not (line.startswith('from ') or 
                   line.startswith('import ') or 
                   '=' in line or 
                   line.startswith('#')):
                return False
        return True
        
    return is_simple_statement(node1_code) and is_simple_statement(node2_code)

def merge_nodes(node1_id: str, node2_id: str, cfg_data: CFGData) -> CFGData:
    """合并两个节点，返回新的CFG数据"""
    nodes_dict = {node.id: node for node in cfg_data.nodes}
    
    # 获取原始节点
    node1 = nodes_dict[node1_id]
    node2 = nodes_dict[node2_id]
    
    # 合并代码
    merged_code = f"{node1.code}\n{node2.code}"
    
    # 创建新节点，保持较小的order
    merged_node = CFGNode(
        id=node1_id,
        code=merged_code,
        order=min(node1.order, node2.order)
    )
    
    # 更新节点列表，保持顺序
    new_nodes = [node for node in cfg_data.nodes if node.id not in {node1_id, node2_id}]
    new_nodes.append(merged_node)
    new_nodes.sort(key=lambda x: x.order)
    
    # 更新边
    new_edges = []
    for edge in cfg_data.edges:
        if edge.from_node == node2_id:
            new_edges.append(CFGEdge(from_node=node1_id, to_node=edge.to_node))
        elif edge.to_node == node2_id:
            continue
        elif edge.from_node != node1_id or edge.to_node != node2_id:
            new_edges.append(edge)
    
    return CFGData(nodes=new_nodes, edges=new_edges)

def can_merge_nodes(from_node_id: str, to_node_id: str, edges: List[CFGEdge], nodes_dict: Dict[str, CFGNode]) -> bool:
    """判断两个节点是否可以合并
    条件：
    1. from_node 必须直接连接到 to_node
    2. to_node 只能有一个入边（来自 from_node）
    3. from_node 只能有一个出边（到 to_node）
    """
    # 检查 from_node -> to_node 的直接连接
    is_directly_connected = any(
        edge.from_node == from_node_id and edge.to_node == to_node_id
        for edge in edges
    )
    if not is_directly_connected:
        return False

    # 检查 to_node 的入边数量
    incoming_edges_to_node = sum(1 for edge in edges if edge.to_node == to_node_id)
    if incoming_edges_to_node > 1:
        return False

    # 检查 from_node 的出边数量
    outgoing_edges_from_node = sum(1 for edge in edges if edge.from_node == from_node_id)
    if outgoing_edges_from_node > 1:
        return False

    return True

def optimize_cfg(cfg_data: CFGData) -> CFGData:
    """优化CFG，合并可以合并的节点"""
    if not cfg_data or len(cfg_data.nodes) <= 1:
        return cfg_data
    
    nodes_dict = {node.id: node for node in cfg_data.nodes}
    changed = True
    
    while changed:
        changed = False
        nodes = sorted(cfg_data.nodes, key=lambda x: x.order)
        
        # 首先尝试合并全局节点
        for i in range(len(nodes) - 1):
            node1 = nodes[i]
            node2 = nodes[i + 1]
            if can_merge_global_nodes(node1, node2, cfg_data.edges):
                cfg_data = merge_nodes(node1.id, node2.id, cfg_data)
                nodes_dict = {node.id: node for node in cfg_data.nodes}
                changed = True
                break
        
        # 如果没有全局节点可以合并，再尝试常规的边合并
        if not changed:
            edges = sorted(cfg_data.edges, 
                         key=lambda e: (nodes_dict[e.from_node].order, nodes_dict[e.to_node].order))
            for edge in edges:
                if can_merge_nodes(edge.from_node, edge.to_node, cfg_data.edges, nodes_dict):
                    cfg_data = merge_nodes(edge.from_node, edge.to_node, cfg_data)
                    nodes_dict = {node.id: node for node in cfg_data.nodes}
                    changed = True
                    break
    
    return cfg_data

def optimize_code_block(block: CodeBlock) -> CodeBlock:
    """优化单个代码块"""
    if block.cfg:
        block.cfg = optimize_cfg(block.cfg)
    
    # 递归优化子块
    block.children = [optimize_code_block(child) for child in block.children]
    return block

def parse_and_optimize_json_to_cfg(json_data: List[dict]) -> List[CodeBlock]:
    """解析JSON并优化所有代码块"""
    blocks = [parse_json_to_cfg(block_data) for block_data in json_data]
    return [optimize_code_block(block) for block in blocks]

def print_code_block_hierarchy(block: CodeBlock, indent: int = 0):
    """打印代码块层次结构"""
    indent_str = "  " * indent
    print(f"{indent_str}Block: {block.decl_name} (lines {block.start_line}-{block.end_line})")
    if block.cfg:
        print(f"{indent_str}CFG Nodes: {len(block.cfg.nodes)}")
        print(f"{indent_str}CFG Edges: {len(block.cfg.edges)}")
        print(f"{indent_str}Nodes (in order):")
        for node in sorted(block.cfg.nodes, key=lambda x: x.order):
            print(f"{indent_str}  Node {node.id} (order {node.order}):")
            print(f"{indent_str}    {node.code.strip()}")
        
        # 打印边的关系
        print(f"{indent_str}Edges:")
        for edge in block.cfg.edges:
            print(f"{indent_str}  {edge.from_node} -> {edge.to_node}")
        print()  # 空行分隔
        
    for child in block.children:
        print_code_block_hierarchy(child, indent + 1)

llm_cfg_path = Path("../llm/1.json")
with open(llm_cfg_path, 'r', encoding='utf-8') as f:
    llm_cfg = json.load(f)

# 解析和优化
optimized_blocks = parse_and_optimize_json_to_cfg(llm_cfg)

# 打印结果
print("\nOptimized Code Block Hierarchy:")
for block in optimized_blocks:
    print_code_block_hierarchy(block)



Optimized Code Block Hierarchy:
Block: GlobalBlock (lines 1-31)
CFG Nodes: 1
CFG Edges: 0
Nodes (in order):
  Node GlobalBlock_1 (order 0):
    from enum import Enum
from typing import Any, Dict, Literal, Optional


from pydantic import SecretStr
from requests.exceptions import RequestException


from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
from backend.data.model import (
    APIKeyCredentials,
    CredentialsField,
    CredentialsMetaInput,
    SchemaField,
)

from backend.integrations.providers import ProviderName
from backend.util.request import requests


TEST_CREDENTIALS = APIKeyCredentials(
    id="01234567-89ab-cdef-0123-456789abcdef",
    provider="ideogram",
    api_key=SecretStr("mock-ideogram-api-key"),
    title="Mock Ideogram API key",
    expires_at=None,
)

TEST_CREDENTIALS_INPUT = {
    "provider": TEST_CREDENTIALS.provider,
    "id": TEST_CREDENTIALS.id,
    "type": TEST_CREDENTIALS.type,
    "title": TEST_CREDENTIALS.type,
}
Edges:

