In [25]:
import pandas as pd
import numpy as np
import torch
import os

In [26]:
# Set device
device = torch.device('cuda:1')
# Directory for data and logs
inputdir = '../data/'
precesseddir = '../data/processed/'
if not os.path.exists(precesseddir):
    os.makedirs(precesseddir)

In [3]:
geo_data = pd.read_csv(inputdir + 'geo_reference.csv', delimiter=';')

In [28]:
geo_data.shape

(3739, 10)

In [18]:
def create_adjacency_matrix(data):
    # Extract unique nodes and map them to an index
    node_ids = pd.concat([data['iu_ac'], data['iu_nd_amont'], data['iu_nd_aval']]).unique()
    node_index = {node_id: idx for idx, node_id in enumerate(node_ids)}
    # Initialize an adjacency matrix of size NxN where N is the number of unique nodes
    num_nodes = len(node_ids)
    print(num_nodes)
    adjacency_matrix = torch.zeros(num_nodes, num_nodes, dtype=torch.float32)
    # Set edges based on upstream and downstream relationships
    for _, row in data.iterrows():
        node_idx = node_index[row['iu_ac']]
        if row['iu_nd_amont'] in node_index:  # Check if upstream node is present
            upstream_idx = node_index[row['iu_nd_amont']]
            adjacency_matrix[upstream_idx][node_idx] = 1  # From upstream to current
        if row['iu_nd_aval'] in node_index:  # Check if downstream node is present
            downstream_idx = node_index[row['iu_nd_aval']]
            adjacency_matrix[node_idx][downstream_idx] = 1  # From current to downstream
    return adjacency_matrix

In [19]:
adj_matrix =  create_adjacency_matrix(geo_data)
print(adj_matrix)

4634
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [20]:
total_elements = adj_matrix.numel()  # 计算矩阵中的总元素数
zero_elements = (adj_matrix == 0).sum().item()  # 计算矩阵中零元素的数量
# 计算稀疏度
non_zero = total_elements - zero_elements
sparsity = zero_elements / total_elements

print(f"Sparsity: {sparsity:.4f}, total_elements:{total_elements} ,non_zero_num:{non_zero}")
print(len(geo_data['iu_ac'].unique()))
print(len(geo_data['iu_nd_amont'].unique()))

Sparsity: 0.9997, total_elements:21473956 ,non_zero_num:6696
3348
1790


In [27]:
adj_matrix_np = adj_matrix.numpy()  # 将 PyTorch 张量转换为 NumPy 数组
# 保存矩阵到 .npz 文件
np.savez_compressed(precesseddir+"adjacency_matrix.npz", adj_matrix=adj_matrix_np)