In [1]:
import osmnx as ox
import pandas as pd
import networkx as nx
from shapely.geometry import Point
from scipy.spatial import cKDTree
import geopandas as gpd

In [2]:
G = ox.load_graphml("../data/chengdu_road_network.graphml")
nodes, edges = ox.graph_to_gdfs(G)

In [3]:
# Get the number of nodes and edges in the graph
print("Number of nodes:", len(nodes))
print("Number of edges:", len(edges))

Number of nodes: 106957
Number of edges: 258601


In [4]:
def read_csv_range(format_list, file_pattern = "../data/chengdu/201408{:02d}.csv"):
    """
    Reads multiple CSV files from the specified range and appends them into a single DataFrame.

    Parameters:
        file_pattern (str): The file path pattern, e.g., .
        start (int): The starting day.
        end (int): The ending day.

    Returns:
        gpd.GeoDataFrame: Combined GeoDataFrame.
    """
    frames = []
    for day in format_list:
        file_path = file_pattern.format(day)
        df = pd.read_csv(file_path)
        df['geometry'] = gpd.points_from_xy(df['Lng'], df['Lat'])
        frames.append(df)
    combined_df = pd.concat(frames, ignore_index=True)
    return gpd.GeoDataFrame(combined_df, geometry='geometry', crs="EPSG:4326"), len(format_list)

In [5]:
# Read and combine the CSV files
format_list_simple = [3, 4, 5]
format_list_full = [3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
gdf, n_days = read_csv_range(format_list_simple, "../data/chengdu/201408{:02d}.csv")

In [6]:
#获取道路中心点
edges['midpoint'] = edges.geometry.apply(lambda geom: geom.interpolate(0.5,normalized=True))
midpoints = list(zip(edges.midpoint.x,edges.midpoint.y))

In [7]:
#使用kdTree将轨迹点与道路匹配
tree = cKDTree(midpoints)
trajectory_points = list(zip(gdf.geometry.x, gdf.geometry.y))
distances, indices = tree.query(trajectory_points)

In [8]:
# 将匹配结果保存到轨迹数据中
gdf['matched_edge'] = indices
gdf['matched_road'] = gdf['matched_edge'].apply(lambda idx: edges.iloc[idx].name)

gdf['Hour'] = pd.to_datetime(gdf['Time']).dt.hour

In [9]:
# 轨迹点流量计算
def calculate_hourly_unique_traffic(group):
    # 按小时分组
    unique_traffic = []
    for hour, hourly_group in group.groupby('Hour'):
        hourly_group = hourly_group.sort_values('Time')
        hourly_group['road_change'] = hourly_group['matched_road'].shift() != hourly_group['matched_road']
        unique_traffic.append(hourly_group[hourly_group['road_change']])
    return pd.concat(unique_traffic)

In [10]:
unique_traffic_df = gdf.groupby('VehicleNum').apply(calculate_hourly_unique_traffic).reset_index(drop=True)
# 初始化每条道路的流量
traffic_counts = {road: [0] * 24 for road in edges.index}

# 按小时统计每条道路的流量
for hour, group in unique_traffic_df.groupby('Hour'):
    road_counts = group['matched_road'].value_counts()
    for road_id, count in road_counts.items():
        traffic_counts[road_id][hour] += count / n_days

In [11]:
# 将流量写回轨迹数据
def get_hourly_traffic(row):
    road_id = row['matched_road']
    hour = row['Hour']
    return traffic_counts.get(road_id, [0] * 24)[hour]

gdf['traffic'] = gdf.apply(get_hourly_traffic, axis=1)

In [12]:
# 保存结果
gdf.drop(columns=['geometry', 'matched_edge'], inplace=True)  # 删除临时列
output_file = "../data/traffic_average.csv"
gdf.to_csv(output_file, index=False)