In [1]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# 1. 路径定义
edges_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data\edges_drive_physical_base.csv"
nodes_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data\nodes_drive_physical_base.csv"

# 2. 加载物理底座数据 (锁定 ID 为字符串)
df_edges = pd.read_csv(edges_path, dtype={'u': str, 'v': str})
df_nodes = pd.read_csv(nodes_path, dtype={'node_id': str})

# 3. 构建节点坐标快速查找表 (ID -> (x, y))
node_coords = df_nodes.set_index('node_id')[['x', 'y']].to_dict('index')

print(">>> 正在计算路网中点并构建 KDTree 索引...")

# 4. 计算每条边的几何中点
midpoints = []
edge_map = [] # 用于记录索引与边的对应关系

for idx, row in df_edges.iterrows():
    u, v = row['u'], row['v']
    
    # 提取端点坐标
    coord_u = node_coords[u]
    coord_v = node_coords[v]
    
    # 计算几何中点 (Arithmetic Mean) 
    mid_x = (coord_u['x'] + coord_v['x']) / 2
    mid_y = (coord_u['y'] + coord_v['y']) / 2
    
    midpoints.append([mid_x, mid_y])
    edge_map.append({'u': u, 'v': v, 'index': idx})

# 5. 构建 KDTree 空间索引 [cite: 23]
midpoints_array = np.array(midpoints)
spatial_index = KDTree(midpoints_array)

# 6. 将中点坐标存回边表 (用于可视化验证)
df_edges['mid_x'] = midpoints_array[:, 0]
df_edges['mid_y'] = midpoints_array[:, 1]

# 7. 体检报告 (Health Check)
print("-" * 40)
print(f"KDTree 索引构建成功！")
print(f"索引节点规模: {len(midpoints)} 个边中点")
print(f"坐标维度: {midpoints_array.shape[1]}D")
print("-" * 40)

# 8. 输出中间文件供后续步骤使用
output_dir = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data"
output_path = f"{output_dir}\\edges_with_midpoints.csv"
df_edges.to_csv(output_path, index=False)
print(f"包含中点信息的边表已保存: {output_path}")

>>> 正在计算路网中点并构建 KDTree 索引...
----------------------------------------
KDTree 索引构建成功！
索引节点规模: 89655 个边中点
坐标维度: 2D
----------------------------------------
包含中点信息的边表已保存: D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data\edges_with_midpoints.csv


In [2]:
import pandas as pd
import numpy as np
from scipy.spatial import KDTree

# 1. 路径统一定义 (修正了 "与改进" 关键字)
base_dir = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data"
aadt_path = f"{base_dir}\\MDOT_SHA_Annual_Average_Daily_Traffic_Baltimore.csv"
edges_mid_path = f"{base_dir}\\edges_with_midpoints.csv"

# 2. 加载数据集
# 加载带中点的边表 (Step 3.1 的产出)
df_edges = pd.read_csv(edges_mid_path, dtype={'u': str, 'v': str})
# 加载 AADT 流量监测数据 [cite: 19]
df_aadt = pd.read_csv(aadt_path)

print(f">>> 正在读取 AADT 数据，记录条数: {len(df_aadt)}")

# 3. 构建/重载空间索引 (KDTree) [cite: 20, 23]
midpoints_array = df_edges[['mid_x', 'mid_y']].values
spatial_index = KDTree(midpoints_array)

# 4. 执行流量吸附 (Snapping) [cite: 24]
# 根据数据集结构，坐标列通常为 'LONGITUDE' 和 'LATITUDE'，流量列为 'AADT'
lon_col, lat_col, aadt_col = 'LONGITUDE', 'LATITUDE', 'AADT'

aadt_coords = df_aadt[[lon_col, lat_col]].values
# 在索引中查找距离每个监测点最近的边索引 [cite: 24]
distances, indices = spatial_index.query(aadt_coords)

# 5. 冲突处理：峰值捕捉 (MAX Strategy) 
# 若多点匹配到同一条路，取最大值以捕捉峰值压力 
edge_volume_map = {}

for i, edge_idx in enumerate(indices):
    volume = df_aadt.iloc[i][aadt_col]
    
    if edge_idx in edge_volume_map:
        edge_volume_map[edge_idx] = max(edge_volume_map[edge_idx], volume)
    else:
        edge_volume_map[edge_idx] = volume

# 6. 将流量注入边表
df_edges['volume'] = np.nan # 预设流量列
for idx, vol in edge_volume_map.items():
    df_edges.at[idx, 'volume'] = vol

# 7. 体检报告 (Health Check)
matched_count = df_edges['volume'].notna().sum()
print("-" * 45)
print(f"流量吸附完成！")
print(f"成功匹配流量的路段数: {matched_count}")
print(f"流量匹配覆盖率: {matched_count / len(df_edges):.2%}")
print(f"全局最大流量 (Peak Volume): {df_edges['volume'].max():.2f}")
print("-" * 45)

# 8. 保存输出文件
output_path = f"{base_dir}\\edges_with_volume.csv"
df_edges.to_csv(output_path, index=False)
print(f"已生成包含流量属性的边表: {output_path}")

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\PyCode\\论文复现\\2025-D\\2507692\\论文复现与优化\\2025_Problem_D_Data\\edges_with_midpoints.csv'