# VISUALIZATION FOR TESTING

In [13]:
import pickle
import numpy as np
from collections import defaultdict, Counter

def feature_summary(pkl_path="models/data/route_graphs_filtered.pkl"):
    """简洁地查看所有特征的整体情况"""
    
    with open(pkl_path, 'rb') as f:
        route_graphs = pickle.load(f)
    
    print(f"数据集: {len(route_graphs)} 个图")
    
    # 收集所有特征值
    node_features = defaultdict(list)
    edge_features = defaultdict(list)
    
    for graph_data in route_graphs:
        G = graph_data['graph']
        
        # 收集节点特征
        for node, attrs in G.nodes(data=True):
            for key, value in attrs.items():
                node_features[key].append(value)
        
        # 收集边特征  
        for u, v, attrs in G.edges(data=True):
            for key, value in attrs.items():
                edge_features[key].append(value)
    
    # 统计节点特征
    print("\n=== 节点特征 ===")
    for feature, values in node_features.items():
        if isinstance(values[0], (int, float)):
            # 数值特征
            arr = np.array(values)
            print(f"{feature}: [{arr.min():.2f}, {arr.max():.2f}], 均值={arr.mean():.2f}")
        else:
            # 分类特征
            unique_count = len(set(values))
            print(f"{feature}: {unique_count} 种取值, 如 {list(set(values))[:3]}")
    
    # 统计边特征
    print("\n=== 边特征 ===")
    for feature, values in edge_features.items():
        if isinstance(values[0], (int, float)):
            # 数值特征
            arr = np.array(values)
            print(f"{feature}: [{arr.min():.2f}, {arr.max():.2f}], 均值={arr.mean():.2f}")
        else:
            # 分类特征
            unique_count = len(set(values))
            print(f"{feature}: {unique_count} 种取值, 如 {list(set(values))[:3]}")

# 运行
feature_summary()

数据集: 1791 个图

=== 节点特征 ===
geometry: 13354 种取值, 如 [<LINESTRING (682438.827 3556899.204, 682448.178 3556850.541, 682460.965 3556...>, <LINESTRING (668757.166 3546908.346, 668782.789 3546902.134, 668784.711 3546...>, <LINESTRING (669044.964 3545666.128, 669097.262 3545647.064, 669109.626 3545...>]
length: [2.22, 65212.11], 均值=458.40
width: [6.00, 25.00], 均值=9.65
highway: 4 种取值, 如 ['street', 'secondary', 'internal']
height_mean: [0.00, 23.12], 均值=0.62
frontage_L_mean: [0.00, 150.00], 均值=1.08
transport_den: [0.00, 6.69], 均值=0.02
nvdi_mean: [0.05, 0.38], 均值=0.18
hop_level: [0.00, 1.00], 均值=0.69
is_center: [0.00, 1.00], 均值=0.06
poi_entropy: [0.00, 1.00], 均值=0.21
linearity: [0.00, 1.00], 均值=0.94
geom_vertex_count: [2.00, 289.00], 均值=9.77

=== 边特征 ===
intersection_coords: 10698 种取值, 如 [(1.3981266969951822, -1.0289125691328502), (-1.2771781990378985, -0.3436414677725141), (0.17819448987996506, 0.5361784967598396)]


In [14]:
import pickle
import numpy as np
from collections import defaultdict

def center_nodes_summary(pkl_path="models/data/route_graphs_filtered.pkl"):
    """查看is_center为True的节点特征概览"""
    
    with open(pkl_path, 'rb') as f:
        route_graphs = pickle.load(f)
    
    # 收集所有center节点的特征
    center_features = defaultdict(list)
    center_count = 0
    total_nodes = 0
    
    for graph_data in route_graphs:
        G = graph_data['graph']
        
        for node, attrs in G.nodes(data=True):
            total_nodes += 1
            
            # 只统计is_center为True的节点
            if attrs.get('is_center', False):
                center_count += 1
                for key, value in attrs.items():
                    center_features[key].append(value)
    
    print(f"数据集: {len(route_graphs)} 个图")
    print(f"总节点数: {total_nodes}")
    print(f"中心节点数: {center_count}")
    print(f"中心节点占比: {center_count/total_nodes:.1%}")
    
    print(f"\n=== 中心节点特征概览 ===")
    
    for feature, values in center_features.items():
        if not values:  # 防止空列表
            continue
            
        if isinstance(values[0], (int, float)):
            # 数值特征统计
            arr = np.array(values)
            print(f"{feature}:")
            print(f"  范围: [{arr.min():.2f}, {arr.max():.2f}]")
            print(f"  均值: {arr.mean():.2f} ± {arr.std():.2f}")
            
        elif isinstance(values[0], bool):
            # 布尔特征
            true_count = sum(values)
            print(f"{feature}: {true_count}/{len(values)} = {true_count/len(values):.1%} 为True")
            
        else:
            # 分类特征
            unique_values = list(set(values))
            print(f"{feature}: {len(unique_values)} 种取值")
            if len(unique_values) <= 5:
                print(f"  取值: {unique_values}")
            else:
                print(f"  示例: {unique_values[:5]}...")

# 运行
center_nodes_summary()

数据集: 1791 个图
总节点数: 30653
中心节点数: 1791
中心节点占比: 5.8%

=== 中心节点特征概览 ===
geometry: 1791 种取值
  示例: [<LINESTRING (680419.718 3553220.613, 680440.322 3553228.728, 680478.652 3553...>, <LINESTRING (652822.142 3548146.345, 652902.124 3548224.011, 652924.427 3548...>, <LINESTRING (664272.478 3564305.209, 664268.477 3564319.556, 664264.496 3564...>, <LINESTRING (681030.535 3535850.319, 681076.257 3535830.028)>, <LINESTRING (656258.661 3546396.249, 656312.442 3546392.523)>]...
length:
  范围: [30.14, 498.04]
  均值: 192.28 ± 123.19
width:
  范围: [6.00, 25.00]
  均值: 7.14 ± 2.47
highway: 1 种取值
  取值: ['street']
height_mean:
  范围: [0.01, 2.60]
  均值: 0.26 ± 0.28
frontage_L_mean:
  范围: [0.00, 20.07]
  均值: 0.50 ± 0.96
transport_den:
  范围: [0.00, 0.31]
  均值: 0.01 ± 0.02
nvdi_mean:
  范围: [0.05, 0.37]
  均值: 0.19 ± 0.06
hop_level:
  范围: [0.00, 0.00]
  均值: 0.00 ± 0.00
is_center:
  范围: [1.00, 1.00]
  均值: 1.00 ± 0.00
poi_entropy:
  范围: [-0.00, 0.97]
  均值: 0.17 ± 0.26
linearity:
  范围: [0.00, 1.00]
  均值: 0.94 ± 0.14
ge