In [1]:
import pandas as pd
import os

# 1. 定义文件路径 (请根据您的本地环境确认路径)
base_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data"

file_final_model = os.path.join(base_path, "edges_final_model.csv")
file_step6_edges = os.path.join(base_path, "edges_drive_step6.csv")
output_file = os.path.join(base_path, "edges_fully_aligned.csv")

# 2. 加载数据集
# df_final: 第一阶段处理完的物理底座 (含 final_weight, volume_filled)
df_final = pd.read_csv(file_final_model)
# df_step6: 原始去噪后的数据 (含 highway, name, maxspeed)
df_step6 = pd.read_csv(file_step6_edges)

# 3. 提取语义特征列
# 我们使用 [u, v] 节点对作为唯一标识进行匹配
semantic_cols = ['u', 'v', 'osmid', 'highway', 'name', 'maxspeed', 'bridge', 'tunnel', 'junction']
df_semantics = df_step6[semantic_cols].drop_duplicates(subset=['u', 'v'])

# 4. 执行特征对齐 (Merge)
# 使用左连接，确保保留第一阶段处理的所有核心路段信息
df_aligned = pd.merge(df_final, df_semantics, on=['u', 'v'], how='left')

# 5. 保存对齐后的数据集
df_aligned.to_csv(output_file, index=False)

print(f"数据特征对齐完成！文件已保存至: {output_file}")
print(f"当前路网包含字段: {df_aligned.columns.tolist()}")

  df_step6 = pd.read_csv(file_step6_edges)


数据特征对齐完成！文件已保存至: D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data\edges_fully_aligned.csv
当前路网包含字段: ['u', 'v', 'length', 'travel_time', 'oneway', 'mid_x', 'mid_y', 'volume', 'volume_filled', 'lanes', 'capacity', 'free_flow_time', 'final_weight', 'osmid', 'highway', 'name', 'maxspeed', 'bridge', 'tunnel', 'junction']


In [3]:
import pandas as pd
import numpy as np
import networkx as nx
import os

# 1. 设置绝对路径
base_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data"
input_file = os.path.join(base_path, "edges_fully_aligned.csv")
output_file = os.path.join(base_path, "od_demand_matrix.csv")

# 2. 加载数据集
print("加载数据中...")
edges = pd.read_csv(input_file)

# 3. 识别起点质量 Oi (住宅区生成潜力)
# 筛选住宅路网，利用处理后的 volume_filled 量化流量生成规模
oi_data = edges[edges['highway'] == 'residential'].groupby('u')['volume_filled'].sum().reset_index()
oi_data.columns = ['node_id', 'oi_mass']

# 4. 识别终点质量 Dj (工业/港口吸引潜力)
# 关键词匹配识别目标区域，结合高等级道路 (primary/motorway/trunk)
dest_keywords = ['Port', 'Dundalk', 'Terminal', 'Industrial', 'Point']
is_dest_road = edges['name'].str.contains('|'.join(dest_keywords), case=False, na=False)
is_main_road = edges['highway'].isin(['motorway', 'trunk', 'primary'])

dj_data = edges[is_dest_road & is_main_road].groupby('v')['volume_filled'].sum().reset_index()
dj_data.columns = ['node_id', 'dj_mass']

# 5. 选取核心节点集以构建矩阵
# 选取生成能力前 100 的节点作为 Origins，吸引能力前 50 的节点作为 Destinations
top_origins = oi_data.nlargest(100, 'oi_mass')
top_destinations = dj_data.nlargest(50, 'dj_mass')
dest_nodes_set = set(top_destinations['node_id'].astype(int))

# 6. 构建图结构并计算成本矩阵 Cij
# 使用第一阶段生成的 final_weight 作为阻抗权重
G = nx.from_pandas_edgelist(edges, 'u', 'v', edge_attr='final_weight', create_using=nx.Graph())

od_results = []
print(f"开始高效计算 100 个起点的单源 Dijkstra 距离...")

for _, o_row in top_origins.iterrows():
    o_node = int(o_row['node_id'])
    
    try:
        # 核心优化：一次计算该起点到全网所有点的距离
        lengths = nx.single_source_dijkstra_path_length(G, source=o_node, weight='final_weight')
        
        # 只提取预定义的终点集合
        for d_node in dest_nodes_set:
            if d_node in lengths:
                cost = lengths[d_node]
                if cost > 0:
                    # 获取该终点对应的 Dj 质量
                    dj_val = top_destinations[top_destinations['node_id'] == d_node]['dj_mass'].values[0]
                    od_results.append({
                        'origin': o_node,
                        'destination': d_node,
                        'oi': o_row['oi_mass'],
                        'dj': dj_val,
                        'cost': cost
                    })
    except Exception as e:
        continue

# 7. 应用引力模型计算流量 Qij [cite: 973]
df_od = pd.DataFrame(od_results)

# 阻抗函数：使用平方反比 f(Cij) = cost^2
df_od['raw_qij'] = (df_od['oi'] * df_od['dj']) / (df_od['cost'] ** 2)

# 8. 全局标定 (Calibration)
# 通过 K 因子使得生成的 OD 总量与全网实际负载量级对齐
total_actual_volume = edges['volume_filled'].sum()
k_factor = total_actual_volume / df_od['raw_qij'].sum()
df_od['demand_qij'] = df_od['raw_qij'] * k_factor

# 9. 保存最终的 OD 需求矩阵
df_od.to_csv(output_file, index=False)

print(f"--- 步骤 1 完成 ---")
print(f"生成文件路径: {output_file}")
print(f"生成的有效 OD 对数量: {len(df_od)}")
print(f"系统总交通需求量估算: {df_od['demand_qij'].sum():.2f}")

加载数据中...
开始高效计算 100 个起点的单源 Dijkstra 距离...
--- 步骤 1 完成 ---
生成文件路径: D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data\od_demand_matrix.csv
生成的有效 OD 对数量: 5000
系统总交通需求量估算: 474347596.00


import pandas as pd
import numpy as np
import networkx as nx
import os

# 1. 定义路径
base_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data"
input_file = os.path.join(base_path, "edges_fully_aligned.csv")
nodes_file = os.path.join(base_path, "nodes_drive_step6.csv")
output_file = os.path.join(base_path, "od_demand_matrix.csv")

# 2. 加载对齐后的数据集
edges = pd.read_csv(input_file)
nodes = pd.read_csv(nodes_file)

# 3. 识别 Origin (Oi) 和 Destination (Dj) 的质量
# Oi: 基于住宅区道路 (residential) 的流量生成能力
residential_mask = edges['highway'] == 'residential'
oi_data = edges[residential_mask].groupby('u')['volume_filled'].sum().reset_index()
oi_data.columns = ['node_id', 'oi_mass']

# Dj: 基于工业/港口关键词和道路等级的吸引能力
# 关键词匹配：Port, Dundalk, Terminal, Industrial, Point
dest_keywords = ['Port', 'Dundalk', 'Terminal', 'Industrial', 'Point']
is_dest_road = edges['name'].str.contains('|'.join(dest_keywords), case=False, na=False)
is_main_road = edges['highway'].isin(['motorway', 'trunk', 'primary'])

dj_data = edges[is_dest_road & is_main_road].groupby('v')['volume_filled'].sum().reset_index()
dj_data.columns = ['node_id', 'dj_mass']

# 4. 选择核心质心 (Centroids) 以优化计算
# 选取生成能力前 100 的节点作为主要起点，吸引能力前 50 的节点作为主要终点
top_origins = oi_data.nlargest(100, 'oi_mass')
top_destinations = dj_data.nlargest(50, 'dj_mass')

# 5. 计算通行成本矩阵 (Cij)
# 构建 NetworkX 图
G = nx.from_pandas_edgelist(edges, 'u', 'v', edge_attr='final_weight', create_using=nx.Graph())

od_results = []
# 计算所有 O 点到 D 点的最短路径成本 (阻抗)
print("正在计算成本矩阵 Cij...")
for _, o_row in top_origins.iterrows():
    o_node = int(o_row['node_id'])
    for _, d_row in top_destinations.iterrows():
        d_node = int(d_row['node_id'])
        
        try:
            # 使用第一阶段的 final_weight 作为成本 Cij
            cost = nx.shortest_path_length(G, source=o_node, target=d_node, weight='final_weight')
            if cost > 0:
                od_results.append({
                    'origin': o_node,
                    'destination': d_node,
                    'oi': o_row['oi_mass'],
                    'dj': d_row['dj_mass'],
                    'cost': cost
                })
        except nx.NetworkXNoPath:
            continue

# 6. 应用引力模型计算流量 Qij
df_od = pd.DataFrame(od_results)

# 设定阻抗函数 f(Cij) = cost^2 (平方反比定律)
df_od['raw_qij'] = (df_od['oi'] * df_od['dj']) / (df_od['cost'] ** 2)

# 7. 标定常数 K 以匹配全网总流量规模
total_network_volume = edges['volume_filled'].sum()
k_factor = total_network_volume / df_od['raw_qij'].sum()
df_od['demand_qij'] = df_od['raw_qij'] * k_factor

# 8. 保存结果
df_od.to_csv(output_file, index=False)
print(f"OD 需求矩阵估算完成！文件已保存至: {output_file}")
print(f"模拟生成的总交通需求量: {df_od['demand_qij'].sum():.2f}")

In [7]:
import pandas as pd
import numpy as np
import networkx as nx
import os

# 1. 路径设置
base_path = r"D:\PyCode\论文复现与改进\2025-D\2507692\论文复现与优化\2025_Problem_D_Data"
input_file = os.path.join(base_path, "edges_fully_aligned.csv")
od_file = os.path.join(base_path, "od_demand_matrix.csv") # 之前生成的含 raw_qij 的文件
output_report = os.path.join(base_path, "od_calibration_comparison.txt")

# 2. 加载数据
edges = pd.read_csv(input_file)
df_od = pd.read_csv(od_file)
G = nx.from_pandas_edgelist(edges, 'u', 'v', edge_attr='final_weight', create_using=nx.Graph())

# 3. 方法 A：计算平均路径边数 (采样 100 对)
print("正在采样计算平均路径边数...")
sample_pairs = df_od.sample(100)
edge_counts = []
for _, row in sample_pairs.iterrows():
    try:
        path = nx.shortest_path(G, source=int(row['origin']), target=int(row['destination']), weight='final_weight')
        edge_counts.append(len(path) - 1)
    except: continue

avg_L = np.mean(edge_counts)
total_link_volume = edges['volume_filled'].sum()
q_total_a = total_link_volume / avg_L

# 4. 方法 B：社会统计学标定
# 巴尔的摩人口约 60 万，考虑通勤与物流，设定日出行总量为 300 万次
q_total_b = 3000000 

# 5. 方法 C：容量瓶颈约束
# 核心路段（如 I-95 或大桥）的典型日容量约为 100,000 辆
core_capacity = edges[edges['highway'].isin(['motorway', 'trunk'])]['capacity'].median()
# 设定最大单对需求不应超过核心路段容量的 15%
raw_max = df_od['raw_qij'].max()
q_total_c = (core_capacity * 0.15) * (df_od['raw_qij'].sum() / raw_max)

# 6. 计算 K 因子并生成报告
raw_sum = df_od['raw_qij'].sum()
k_a, k_b, k_c = q_total_a / raw_sum, q_total_b / raw_sum, q_total_c / raw_sum

report = f"""
OD 矩阵多维度标定报告
--------------------------------------------------
路网总流水 (Link Volume Sum): {total_link_volume:,.0f}
平均路径边数 (Avg Edge Count): {avg_L:.2f}
核心路段中值容量 (Core Capacity): {core_capacity:,.0f}

[方法 A - 网络周转修正] 建议总需求: {q_total_a:,.0f} 次 (K = {k_a:.4e})
[方法 B - 社会统计定额] 建议总需求: {q_total_b:,.0f} 次 (K = {k_b:.4e})
[方法 C - 容量极限约束] 建议总需求: {q_total_c:,.0f} 次 (K = {k_c:.4e})

分析建议：
1. 若 A > B: 说明存在显著的【过境/物流交通】。
2. 若 B > C: 说明大桥倒塌后的路网【无法承载】基本民生出行，是优化的核心动力。
--------------------------------------------------
"""
print(report)

# 保存最终修正后的矩阵 (采用方法 B 作为基准，参考 A/C 进行微调)
# 此处我们取一个平衡值或直接采用最稳健的 B
df_od['demand_qij'] = df_od['raw_qij'] * k_b 
df_od.to_csv(os.path.join(base_path, "od_demand_matrix_calibrated.csv"), index=False)

正在采样计算平均路径边数...

OD 矩阵多维度标定报告
--------------------------------------------------
路网总流水 (Link Volume Sum): 474,347,596
平均路径边数 (Avg Edge Count): 118.97
核心路段中值容量 (Core Capacity): 30,000

[方法 A - 网络周转修正] 建议总需求: 3,987,119 次 (K = 4.5741e+00)
[方法 B - 社会统计定额] 建议总需求: 3,000,000 次 (K = 3.4417e+00)
[方法 C - 容量极限约束] 建议总需求: 83,261 次 (K = 9.5519e-02)

分析建议：
1. 若 A > B: 说明存在显著的【过境/物流交通】。
2. 若 B > C: 说明大桥倒塌后的路网【无法承载】基本民生出行，是优化的核心动力。
--------------------------------------------------

