In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

In [17]:
df = pd.read_csv('Porto.txt', sep=' ', header=0)
df.head()

Unnamed: 0,eid,source,target,dir,capacity,speed_mph,cost_time,tt_user,tt_soc
0,20984,1,7361,1,950,20,0.579894,0.579894,0.579894
1,3780,1,27,1,950,20,0.044809,0.04481,0.044811
2,9968,2,2249,1,8100,40,0.439616,0.444568,0.44054
3,7447,3,9732,1,5400,40,0.082405,0.082409,0.082431
4,22016,3,8221,1,1900,30,0.674299,0.674387,0.707709


In [18]:
source_unique = df['source'].unique()
target_unique = df['target'].unique()
equal = np.array_equal(np.sort(source_unique), np.sort(target_unique))
equal

True

In [19]:
len(source_unique), len(target_unique)

(10326, 10326)

In [20]:
# 将速度从英里每小时转换为千米每小时
df['speed_kmph'] = df['speed_mph'] * 1.60934

# 根据速度和时间计算出路段的长度（单位：千米）
df['length_km'] = df['speed_kmph'] * df['cost_time']

df.head()


Unnamed: 0,eid,source,target,dir,capacity,speed_mph,cost_time,tt_user,tt_soc,speed_kmph,length_km
0,20984,1,7361,1,950,20,0.579894,0.579894,0.579894,32.1868,18.664932
1,3780,1,27,1,950,20,0.044809,0.04481,0.044811,32.1868,1.442257
2,9968,2,2249,1,8100,40,0.439616,0.444568,0.44054,64.3736,28.299635
3,7447,3,9732,1,5400,40,0.082405,0.082409,0.082431,64.3736,5.304717
4,22016,3,8221,1,1900,30,0.674299,0.674387,0.707709,48.2802,32.555286


In [21]:
df['speed_kmph'].unique()

array([32.1868, 64.3736, 48.2802, 96.5604])

In [22]:
speed_mapping = {32.1868: 2, 64.3736: 4, 48.2802: 3, 96.5604: 6}

df['speedLim'] = df['speed_kmph'].map(speed_mapping)

df['cellNum'] = (df['length_km'] * 1000 / 7.5).round().astype(int)

df.head()

Unnamed: 0,eid,source,target,dir,capacity,speed_mph,cost_time,tt_user,tt_soc,speed_kmph,length_km,speedLim,cellNum
0,20984,1,7361,1,950,20,0.579894,0.579894,0.579894,32.1868,18.664932,2,2489
1,3780,1,27,1,950,20,0.044809,0.04481,0.044811,32.1868,1.442257,2,192
2,9968,2,2249,1,8100,40,0.439616,0.444568,0.44054,64.3736,28.299635,4,3773
3,7447,3,9732,1,5400,40,0.082405,0.082409,0.082431,64.3736,5.304717,4,707
4,22016,3,8221,1,1900,30,0.674299,0.674387,0.707709,48.2802,32.555286,3,4341


In [23]:
data = df[['source', 'target', 'speedLim', 'cellNum']]
data.head()

Unnamed: 0,source,target,speedLim,cellNum
0,1,7361,2,2489
1,1,27,2,192
2,2,2249,4,3773
3,3,9732,4,707
4,3,8221,3,4341


In [25]:
G = nx.DiGraph()
for _, row in data.iterrows():
    G.add_edge(row['source'], row['target'], speed=row['speedLim'], length=row['cellNum'])

is_strongly_connected = nx.is_strongly_connected(G)
is_strongly_connected

True

In [None]:
def generate_new_edges_and_nodes(source, target, length, speed):
    new_node = f"{int(source)}000{int(target)}"
    new_edges = [
        [source, new_node],
        [new_node, target]
    ]
    new_node_info = [new_node, length, speed]
    return new_edges, new_node_info

new_edges_list = []
new_nodes_list = []
for _, row in data.iterrows():
    new_edges, new_node_info = generate_new_edges_and_nodes(
        row['source'], row['target'], row['cellNum'], row['speedLim']
    )
    new_edges_list.extend(new_edges)
    new_nodes_list.append(new_node_info)

original_nodes = set(data['source']).union(set(data['target']))
for node in original_nodes:
    speed = data.loc[data['source'] == node, 'speedLim'].values[0]
    new_nodes_list.append([node, -1, speed])

df_edges = pd.DataFrame(new_edges_list, columns=['source', 'target'])
df_edges['source'] = df_edges['source'].astype(int).astype(str)
df_edges['target'] = df_edges['target'].astype(int).astype(str)

df_nodes = pd.DataFrame(new_nodes_list, columns=['node_id', 'length', 'speed'])
df_nodes['node_id'] = df_nodes['node_id'].astype(str)

df_nodes['length'] = df_nodes['length'].apply(lambda x: round(x / 7.5) if x != -1 else -1)
df_nodes['speed'] = df_nodes['speed'].apply(lambda x: round(x / 5))

# 保存处理后的数据
df_edges.to_csv('./resources/Porto_Edges.csv', index=False)
df_nodes.to_csv('./resources/Porto_Nodes.csv', index=False)