In [2]:
import pandas as pd
import networkx as nx

# 1. 文件路径定义 (基于您提供的绝对路径)
nodes_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data\nodes_drive_step6.csv"

# 2. 锁定 ID 加载数据 (Defensive Programming)
# 通过 dtype 强制将 'osmid' 读取为字符串，防止长整数精度丢失 [cite: 145]
df_nodes = pd.read_csv(nodes_path, dtype={'osmid': str})

# 3. 初始化有向图对象
G = nx.DiGraph()

# 4. 执行节点注入与几何绑定
print(">>> 正在执行节点注入与几何属性绑定...")
for _, row in df_nodes.iterrows():
    # 提取 ID 与坐标
    node_id = str(row['osmid'])
    lon = row['x']  # 经度 
    lat = row['y']  # 纬度 
    
    # 注入节点及其几何属性 
    G.add_node(node_id, x=lon, y=lat)

# 5. 持续的“体检报告” (Health Check Report) [cite: 108, 167]
print("-" * 40)
print(f"节点注入完成！")
print(f"总计注入节点数: {G.number_of_nodes()}")

# 验证属性注入是否成功 [cite: 174]
if G.number_of_nodes() > 0:
    sample_node = list(G.nodes)[0]
    sample_data = G.nodes[sample_node]
    print(f"抽查节点 ID: {sample_node}")
    print(f"节点坐标验证: x={sample_data.get('x')}, y={sample_data.get('y')}")

# 坐标完备性自检
missing_coords = [n for n, d in G.nodes(data=True) if 'x' not in d or 'y' not in d]
if not missing_coords:
    print("验证通过：所有节点均已成功绑定坐标属性。")
else:
    print(f"警告：发现 {len(missing_coords)} 个节点缺失坐标！")
print("-" * 40)

>>> 正在执行节点注入与几何属性绑定...
----------------------------------------
节点注入完成！
总计注入节点数: 36946
抽查节点 ID: 37293968
节点坐标验证: x=-76.7624289, y=39.2748695
验证通过：所有节点均已成功绑定坐标属性。
----------------------------------------


In [3]:
import pandas as pd
import networkx as nx

# 1. 加载边数据并锁定 ID 类型
edges_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data\edges_drive_step6.csv"
# 强制将起点 u 和终点 v 读取为字符串 [cite: 69, 145]
df_edges = pd.read_csv(edges_path, dtype={'u': str, 'v': str})

# 2. 构建 $O(1)$ 查找集 (The Power of Sets)
# 预先提取所有原始边，用于后续的“存在性检查” [cite: 79, 151]
existing_edges = set(zip(df_edges['u'], df_edges['v']))

print(f">>> 正在执行边注入与物理属性建模，处理边数: {len(df_edges)}")

# 3. 遍历边数据进行注入
for _, row in df_edges.iterrows():
    u, v = str(row['u']), str(row['v'])
    length = float(row['length'])
    oneway = row['oneway']
    
    # A. 物理属性预处理与数值防御 [cite: 128, 129]
    # 强制修正速度分母，防止除零错误 (Division By Zero Error) [cite: 92, 129]
    speed = float(row['maxspeed'])
    if speed <= 0:
        speed = 1.0  # 修正为 1m/s 的极低流速 [cite: 95, 129]
    
    # 计算通行时间阻抗 (Travel Time = Length / Speed) [cite: 35, 154]
    travel_time = length / speed
    
    # B. 正向边注入 [cite: 155]
    G.add_edge(u, v, length=length, travel_time=travel_time, oneway=oneway)
    
    # C. 反向边逻辑 (防御性展开) [cite: 156]
    # 如果是双向道 (oneway=False) 且原始数据中不存在反向边，则补全 [cite: 118, 157]
    if not oneway:
        if (v, u) not in existing_edges:
            # 添加反向边，物理属性与正向一致 [cite: 158, 160]
            G.add_edge(v, u, length=length, travel_time=travel_time, oneway=oneway)
        # ELSE: 如果已存在，则跳过以防产生重边 [cite: 159, 161]

# 4. 体检报告 (Health Check Report) [cite: 107, 167]
print("-" * 40)
print(f"边注入完成！")
print(f"图中当前总边数: {G.size()}")
# 抽查验证单位
sample_edge = list(G.edges(data=True))[0]
print(f"抽查边 {sample_edge[0]}->{sample_edge[1]} 属性: {sample_edge[2]}")
print("-" * 40)

  df_edges = pd.read_csv(edges_path, dtype={'u': str, 'v': str})


>>> 正在执行边注入与物理属性建模，处理边数: 90924
----------------------------------------
边注入完成！
图中当前总边数: 89655
抽查边 37293968->37293970 属性: {'length': 55.48935589611431, 'travel_time': 6.206863075627999, 'oneway': False}
----------------------------------------


In [4]:
import networkx as nx

# 1. 计算全图的所有强连通分量 (Strongly Connected Components)
# 必须使用 SCC 而非 WCC，以避免“进得去出不来”的黑洞节点 [cite: 84, 86, 130]
print(">>> 正在计算强连通分量并执行孤岛剔除...")
scc_components = list(nx.strongly_connected_components(G))

# 记录剔除前的状态用于“体检报告” [cite: 108, 167]
initial_node_count = G.number_of_nodes()
initial_edge_count = G.size()

# 2. 提取节点数最多的子图 (Largest SCC) [cite: 87, 133, 165]
largest_scc_nodes = max(scc_components, key=len)
G_final = G.subgraph(largest_scc_nodes).copy()

# 3. 生成“体检报告” (Health Check Report) [cite: 109, 167]
final_node_count = G_final.number_of_nodes()
final_edge_count = G_final.size()

node_retention = final_node_count / initial_node_count
edge_retention = final_edge_count / initial_edge_count

print("-" * 40)
print(f"孤岛剔除完成！")
print(f"原始节点数: {initial_node_count} -> 剔除后: {final_node_count}")
print(f"剔除节点数: {initial_node_count - final_node_count}")
print(f"节点保留率: {node_retention:.2%} (预期结果应 > 99%) [cite: 60, 167]")
print(f"边保留率: {edge_retention:.2%}")

# 4. 最终连通性校验 [cite: 173]
if nx.is_strongly_connected(G_final):
    print("最终路网状态: 强连通 (Strongly Connected) - 校验通过")
else:
    print("警告: 路网仍不完全连通，请检查步骤 2.2 的双向道展开逻辑")
print("-" * 40)

# 5. 用该子图覆盖原图对象，完成基座构建 [cite: 166]
G = G_final

>>> 正在计算强连通分量并执行孤岛剔除...
----------------------------------------
孤岛剔除完成！
原始节点数: 36946 -> 剔除后: 36946
剔除节点数: 0
节点保留率: 100.00% (预期结果应 > 99%) [cite: 60, 167]
边保留率: 100.00%
最终路网状态: 强连通 (Strongly Connected) - 校验通过
----------------------------------------


In [5]:
import numpy as np
import networkx as nx

def perform_data_audit(graph, original_df_edges):
    print(">>> 开始执行路网基座质量审计 (Data Audit)...")
    results = {}

    # 1. 无重边校验 (No Duplicate Edges) [cite: 170]
    # 如果图中边数远小于 2 倍原始边数且完成了防御性展开，说明无多余重边 [cite: 120, 124]
    results['no_duplicate_edges'] = graph.size() <= len(original_df_edges) * 1.5 

    # 2. 单位统一与数值防御校验 [cite: 171, 172]
    speeds = []
    travel_times = []
    for u, v, data in graph.edges(data=True):
        t = data.get('travel_time', 0)
        l = data.get('length', 0)
        travel_times.append(t)
        if t > 0:
            speeds.append(l / t) # 计算隐含速度 (m/s)

    # 抽查速度是否在合理区间 (10-30 m/s) 
    avg_speed = np.mean(speeds) if speeds else 0
    results['unit_uniformity'] = 5 <= avg_speed <= 40 
    
    # 确认 travel_time 是否均为有限正数 [cite: 172]
    results['no_zero_division'] = all(np.isfinite(travel_times)) and all(t > 0 for t in travel_times)

    # 3. 强连通校验 [cite: 173]
    results['strongly_connected'] = nx.is_strongly_connected(graph)

    # 4. 坐标完备性校验 [cite: 174]
    nodes_with_coords = [n for n, d in graph.nodes(data=True) if 'x' in d and 'y' in d]
    results['coords_complete'] = len(nodes_with_coords) == graph.number_of_nodes()

    # 输出审计报告 [cite: 107, 108]
    print("-" * 45)
    print(f"{'检查项目':<20} | {'结果':<10} | {'备注'}")
    print("-" * 45)
    print(f"{'无重边校验':<20} | {'通过' if results['no_duplicate_edges'] else '失败':<10} | 排除重复反向边 [cite: 170]")
    print(f"{'单位统一校验':<20} | {'通过' if results['unit_uniformity'] else '失败':<10} | 平均速度: {avg_speed:.2f} m/s ")
    print(f"{'无除零异常':<20} | {'通过' if results['no_zero_division'] else '失败':<10} | 通行时间均为有限正数 [cite: 172]")
    print(f"{'强连通校验':<20} | {'通过' if results['strongly_connected'] else '失败':<10} | 逻辑闭环, 无死路 [cite: 173]")
    print(f"{'坐标完备性':<20} | {'通过' if results['coords_complete'] else '失败':<10} | 100% 节点具备 x,y 坐标 [cite: 174]")
    print("-" * 45)
    
    return all(results.values())

# 执行审计
is_audit_passed = perform_data_audit(G, df_edges)

if is_audit_passed:
    print(">>> 审计结论：路网底座质量极优，符合建模标准。 [cite: 60]")
else:
    print(">>> 审计结论：发现潜在数据缺陷，请检查对应步骤。")

>>> 开始执行路网基座质量审计 (Data Audit)...
---------------------------------------------
检查项目                 | 结果         | 备注
---------------------------------------------
无重边校验                | 通过         | 排除重复反向边 [cite: 170]
单位统一校验               | 通过         | 平均速度: 10.93 m/s 
无除零异常                | 通过         | 通行时间均为有限正数 [cite: 172]
强连通校验                | 通过         | 逻辑闭环, 无死路 [cite: 173]
坐标完备性                | 通过         | 100% 节点具备 x,y 坐标 [cite: 174]
---------------------------------------------
>>> 审计结论：路网底座质量极优，符合建模标准。 [cite: 60]


In [6]:
import pandas as pd

# 定义输出目录
output_dir = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data"

# 1. 保存节点底座：必须包含经纬度，用于后续 KDTree 吸附 [cite: 17, 146]
nodes_output_path = f"{output_dir}\\nodes_drive_physical_base.csv"
pd.DataFrame([
    {'node_id': n, 'x': d['x'], 'y': d['y']} 
    for n, d in G.nodes(data=True)
]).to_csv(nodes_output_path, index=False)

# 2. 保存边底座：必须包含 travel_time，作为路径搜索权重 
edges_output_path = f"{output_dir}\\edges_drive_physical_base.csv"
pd.DataFrame([
    {'u': u, 'v': v, 'length': d['length'], 'travel_time': d['travel_time'], 'oneway': d['oneway']} 
    for u, v, d in G.edges(data=True)
]).to_csv(edges_output_path, index=False)

print(f">>> 数据基座构建完成并保存。节点: {G.number_of_nodes()}, 边: {G.size()}")

>>> 数据基座构建完成并保存。节点: 36946, 边: 89655
