In [1]:
import torch
import numpy as np
import pandapower.networks as pn
from torch_geometric.data import Data
import os
from tqdm import tqdm

# 确保输出目录存在
processed_dir = "../data/processed"
if not os.path.exists(processed_dir):
    os.makedirs(processed_dir)

In [2]:
def get_ieee33_topology():
    net = pn.case33bw()
    
    # 1. Edge Index (连接关系)
    # Pandapower 的 line 表格存储了 from_bus 和 to_bus
    from_nodes = net.line.from_bus.values
    to_nodes = net.line.to_bus.values
    
    # 构建双向图 (Undirected Graph)
    # GNN 中 A-B 通常表示为 A->B 和 B->A 两条边
    src = np.concatenate([from_nodes, to_nodes])
    dst = np.concatenate([to_nodes, from_nodes])
    edge_index = torch.tensor([src, dst], dtype=torch.long)
    
    # 2. Edge Attributes (线路阻抗 R, X)
    # 物理意义：R 决定有功损耗和压降，X 决定无功压降
    # 注意单位转换：r_ohm_per_km * length_km
    r = net.line.r_ohm_per_km.values * net.line.length_km.values
    x = net.line.x_ohm_per_km.values * net.line.length_km.values
    
    # 同样拼接成双向
    r_all = np.concatenate([r, r])
    x_all = np.concatenate([x, x])
    
    # 归一化 Edge Attr (很重要，否则阻抗数值太小，梯度很难传导)
    # 我们使用简单的 Min-Max 归一化或者 Z-Score
    edge_attr = np.stack([r_all, x_all], axis=1)
    edge_attr = (edge_attr - edge_attr.mean(axis=0)) / (edge_attr.std(axis=0) + 1e-6)
    edge_attr = torch.tensor(edge_attr, dtype=torch.float32)
    
    return edge_index, edge_attr

edge_index, edge_attr = get_ieee33_topology()
print(f"拓扑构建完成: Edge Index {edge_index.shape}, Edge Attr {edge_attr.shape}")

  edge_index = torch.tensor([src, dst], dtype=torch.long)


拓扑构建完成: Edge Index torch.Size([2, 74]), Edge Attr torch.Size([74, 2])


In [3]:
def process_data():
    # 1. 加载我们在 Step 01 生成的“黄金数据”
    raw_dir = "../data/raw"
    V = np.load(os.path.join(raw_dir, "voltage_magnitude.npy")) # [5000, 33]
    P = np.load(os.path.join(raw_dir, "active_power.npy"))      # [5000, 33]
    Q = np.load(os.path.join(raw_dir, "reactive_power.npy"))    # [5000, 33]
    
    print(f"原始数据加载: V shape={V.shape}")
    
    # 2. 计算全局统计量 (用于归一化和反归一化)
    # 我们需要保存这些 stats，以便在后续画图时把 [-1, 1] 还原回 [0.85, 1.07]
    stats = {
        'v_min': V.min(), 'v_max': V.max(),
        'p_mean': P.mean(), 'p_std': P.std(),
        'q_mean': Q.mean(), 'q_std': Q.std()
    }
    
    data_list = []
    
    print("开始封装图数据...")
    for i in tqdm(range(len(V))):
        # --- 输入特征 X (Condition) ---
        # 即使是 Diffusion，我们也需要输入条件 (P, Q)
        # Z-Score 标准化 P 和 Q
        p_norm = (P[i] - stats['p_mean']) / (stats['p_std'] + 1e-6)
        q_norm = (Q[i] - stats['q_mean']) / (stats['q_std'] + 1e-6)
        x = torch.tensor(np.stack([p_norm, q_norm], axis=1), dtype=torch.float32) # [33, 2]
        
        # --- 预测目标 Y (State) ---
        # Diffusion 的目标变量通常归一化到 [-1, 1]
        v_norm = 2 * (V[i] - stats['v_min']) / (stats['v_max'] - stats['v_min']) - 1
        y = torch.tensor(v_norm, dtype=torch.float32).view(-1, 1) # [33, 1]
        
        # 构建 Data 对象
        data = Data(x=x, y=y, edge_index=edge_index, edge_attr=edge_attr)
        data_list.append(data)
        
    # 3. 保存
    torch.save(data_list, os.path.join(processed_dir, "ieee33_graph_data.pt"))
    torch.save(stats, os.path.join(processed_dir, "data_stats.pt"))
    
    return stats

# 执行处理
stats = process_data()

print("\n=== 数据处理报告 ===")
print(f"V范围 (原始): [{stats['v_min']:.4f}, {stats['v_max']:.4f}]")
print(f"P均值/方差: {stats['p_mean']:.4f} / {stats['p_std']:.4f}")
print("处理完成。")

原始数据加载: V shape=(5000, 33)
开始封装图数据...


100%|██████████| 5000/5000 [00:00<00:00, 21739.20it/s]



=== 数据处理报告 ===
V范围 (原始): [0.8467, 1.1097]
P均值/方差: -0.0392 / 0.2586
处理完成。
