In [1]:
import pandas as pd
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True,nb_workers=10)

INFO: Pandarallel will run on 10 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [2]:
traj = pd.read_csv('./input/traj_100000.csv',usecols=['lon','lat','uid','time'])
traj.sort_values(by=['uid','time'],inplace=True)
traj = traj.dropna()
start_time = pd.to_datetime('2023-07-12')
traj['time'] = pd.to_datetime(traj['time'])
traj = traj[traj['time']>=start_time]
traj['timestamp'] = (traj['time'] - start_time).dt.total_seconds()/60
traj = traj[~((traj['timestamp']%(24*60)==0)&(traj['timestamp']/(24*60)>0))]
traj['elat'] = traj.groupby('uid')['lat'].shift(-1)
traj['elon'] = traj.groupby('uid')['lon'].shift(-1)
traj = traj.dropna()
traj['traj'] = traj.apply(lambda x: str(round(x['lon'],4))+str(round(x['lat'],4))+str(round(x['elon'],4))+str(round(x['elat'],4)),axis=1)
traj.drop_duplicates(subset=['traj','uid'],keep='first',inplace=True)
traj.drop(columns=['traj'],inplace=True)
#traj = traj[traj['uid']<=10]


In [3]:


round_num = 3

traj['md5'] = traj.apply(lambda x: str(round(x['lon'],round_num))+','+str(round(x['lat'],round_num))+','+str(round(x['elon'],round_num))+','+str(round(x['elat'],round_num)),axis=1)
# 对md5去重
traj = traj.drop_duplicates(subset=['md5'],keep='first')



In [4]:
import osmnx as ox
import warnings
warnings.filterwarnings('ignore')
G = ox.load_graphml('./data/shanghai_road2.graphml')
hwy_speeds={"residential": 10, "secondary": 15,
                    'primary': 15, "tertiary": 20}
 # 设置路网
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)  # 将路网转换为GeoDataFrame
# 连接两个节点只保留一条边
gdf_edges = gdf_edges.reset_index().drop_duplicates(
    subset=['u', 'v'], keep='first')
gdf_edges['key'] = 0
gdf_edges = gdf_edges.set_index(['u', 'v', 'key'])
# 重新构建路网
# 图属性,这里使用了之前的路网数据的图属性
G = ox.graph_from_gdfs(gdf_nodes, gdf_edges, graph_attrs=G.graph)
# 路网最大强连通子图
G = ox.utils_graph.get_largest_component(G,
                                            strongly=True  # 是否强连通
                                            )
# 计算每条边出行时长
G = ox.add_edge_speeds(G, hwy_speeds)
G = ox.add_edge_travel_times(G)

import numpy as np
def find_travel_path(G, slon, slat, elon, elat, md5 , shortest=1, weight='travel_time'):
    # 找到两点间的最短出行路径
    # 由给定的坐标获取最近节点
    orig = ox.distance.nearest_nodes(G, X=slon, Y=slat)
    dest = ox.distance.nearest_nodes(G, X=elon, Y=elat)
    if shortest == 1:
        # 找到最短路径
        travel_route = ox.shortest_path(
            G, orig, dest, weight=weight)
    if shortest > 1:
        # 前k最短路径中选择一个
        routes = ox.k_shortest_paths(
            G, orig, dest, k=shortest, weight=weight)
        routes = list(routes)
        travel_route = routes[np.random.choice(range(len(routes)))]
    # 获取路径上的行驶时间
    travel_time = ox.utils_graph.get_route_edge_attributes(
        G, travel_route, attribute='travel_time')
    length = ox.utils_graph.get_route_edge_attributes(
        G, travel_route, attribute='length')
    # 将路径和行驶时间组合成字典
    route = {
        'direct' : False,
        'travel_route': travel_route,
        'travel_time': travel_time,
        'length': length,
        'has_path': len(travel_route) > 1,
        'slon': slon,
        'slat': slat,
        'elon': elon,
        'elat': elat}
    return route
    

In [5]:
gap = 10000
for i in range(0,int(len(traj)/gap)+1,1):
    print(i*gap,(i+1)*gap)
    trajthis = traj[i*gap:(i+1)*gap]

    trajthis['result'] = trajthis.parallel_apply(lambda row: find_travel_path(G,row['lon'],row['lat'],row['elon'],row['elat'],row['md5']),axis=1)

    route_dict = trajthis.set_index('md5')['result'].to_dict()
    import pickle
    with open(f'./cache/route_dict_{i}.pkl','wb') as file:
        pickle.dump(route_dict,file)

0 10000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

10000 20000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

20000 30000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

30000 40000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

40000 50000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

50000 60000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

60000 70000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

70000 80000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

80000 90000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

90000 100000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

100000 110000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

110000 120000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

120000 130000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

130000 140000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

140000 150000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

150000 160000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

160000 170000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

170000 180000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

180000 190000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

190000 200000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

200000 210000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

210000 220000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

220000 230000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

230000 240000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

240000 250000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

250000 260000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

260000 270000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

270000 280000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

280000 290000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

290000 300000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

300000 310000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

310000 320000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

320000 330000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

330000 340000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

340000 350000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

350000 360000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

360000 370000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

370000 380000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

380000 390000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

390000 400000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

400000 410000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

410000 420000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

420000 430000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

430000 440000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

440000 450000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

450000 460000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

460000 470000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

470000 480000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

480000 490000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

490000 500000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

500000 510000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

510000 520000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

520000 530000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

530000 540000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

540000 550000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

550000 560000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

560000 570000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

570000 580000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

580000 590000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

590000 600000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

600000 610000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

610000 620000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

620000 630000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

630000 640000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

640000 650000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

650000 660000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

660000 670000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

670000 680000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

680000 690000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

690000 700000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

700000 710000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

710000 720000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

720000 730000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

730000 740000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

740000 750000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

750000 760000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

760000 770000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

770000 780000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

780000 790000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

790000 800000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

800000 810000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

810000 820000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

820000 830000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

830000 840000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

840000 850000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

850000 860000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

860000 870000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

870000 880000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

880000 890000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

890000 900000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

900000 910000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

910000 920000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

920000 930000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

930000 940000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

940000 950000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

950000 960000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

960000 970000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

970000 980000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

980000 990000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

990000 1000000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1000000 1010000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1010000 1020000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1020000 1030000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1030000 1040000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1040000 1050000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1050000 1060000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1060000 1070000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1070000 1080000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1080000 1090000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1090000 1100000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1100000 1110000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1110000 1120000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1120000 1130000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1130000 1140000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1140000 1150000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1150000 1160000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=1000), Label(value='0 / 1000'))), …

1160000 1170000


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=504), Label(value='0 / 504'))), HB…