In [1]:
from xml.dom.minidom import parse

import unittest
import os
import time
import xml.dom.minidom
import pandas as pd
import csv
import xml.etree.ElementTree as ET
import networkx as nx


In [2]:
# 1. Capture network information from ".xml" and save into ".csv"
# SUMO's  network contains connection information. This code extracts the following required information from it:
# 1). edge ID,
# 2). Features corresponding to edge ID (road length, speed limit, lanes number),
# 3). Two node IDs connected to edge ID,

def network_data_exploration(filename):
    # Define path
    path = os.path.abspath('./../../../Realistic_Event_Data_Generation_Procedure/') 
    # Define data path
    data_path = os.path.join(path,filename) 
    print('Data path is:', data_path) 
    
    # Open ".xml" and find data
    DOMTree = xml.dom.minidom.parse(data_path) 
    data = DOMTree.documentElement 
    
    # Get element list
    nodeList = data.getElementsByTagName("edge")
    print("Number of edges are: ", len(nodeList))
    
    # Define and initilzie explored data
    all_rows = []
    
    for node in nodeList: 
        # Get features of each edge
        edge_ID = node.getAttribute("id")
        node_start = node.getAttribute("from")
        node_end = node.getAttribute("to")
        priority = node.getAttribute("priority")
        # Get element list of edge
        subNodeList = node.getElementsByTagName("lane")
        
        # Initialize features
        speed = 0
        length = 0
        for subNode in subNodeList:
            # Get features of each lane
            speed += float(subNode.getAttribute("speed"))
        
            length += float(subNode.getAttribute("length"))
        speed_avg = speed / len(subNodeList)
        length_avg = length / len(subNodeList)
        lane_num = len(subNodeList)
            
        newRow = {"edge_id": edge_ID, "node_start": node_start, "node_end": node_end, "lane_num":lane_num, "speed": speed_avg, "length": length_avg, "priority": priority}
        all_rows.append(newRow)
              
    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(all_rows)
    return df
 
    
# Call function to export network information data
df = network_data_exploration("test.net.xml")
df.head()
df.to_csv('Manhattan_network_raw.csv', index=False)

Data path is: /Users/xuzizhuo/Desktop/Main Folder/My_works/routeSys_Github/Model_Training_on_Manual_Allocation_Data/SUMO_Simulation/test.net.xml
Number of edges are:  29497


In [3]:
# 2. Build a dictionary that maps SUMO road network information into int format and is readable by Simulation Algorithm:
# 1). Convert node ID of string structure to int structure: node_to_int,
# 2). Convert edge ID of string structure to int structure: edge_to_int,
# 3). Convert edge ID of int structure to string structure: int_to_edge,
# 4). Store node ID and edge ID of int structure in Manhattan_network_mapped.csv.

# Read data
df = pd.read_csv('Manhattan_network_raw.csv')

# Convert unique string ids of "node_start" and "node_end" to unique integers.
# Get a list of unique nodes
unique_nodes = pd.concat([df['node_start'], df['node_end']]).unique()
# Create a mapping of node string id to integer
node_to_int = {node: idx for idx, node in enumerate(unique_nodes)}
# Replace the string ids in the dataframe
df['node_start'] = df['node_start'].map(node_to_int)
df['node_end'] = df['node_end'].map(node_to_int)

# Convert "edge_id" to unique integers.
# Create a mapping of edge string id to integer
edge_to_int = {edge: idx for idx, edge in enumerate(df['edge_id'].unique())}
# Replace the string ids in the dataframe
df['edge_id'] = df['edge_id'].map(edge_to_int)

# Reverse edge_to_int
int_to_edge = {v: k for k, v in edge_to_int.items()}

# Save mapped dataframe
df.to_csv('Manhattan_network_mapped.csv', index=False)

In [4]:
# 3. Caputre connection information
def parse_sumo_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    edges = {}
    connections = []

    for edge in root.findall('.//edge'):
        edge_id = edge.get('id')
        edges[edge_id] = {
            'from': edge.get('from'),
            'to': edge.get('to')
        }

    for connection in root.findall('.//connection'):
        from_edge = connection.get('from')
        to_edge = connection.get('to')
        direction = connection.get('dir')  # Turning direction
        connections.append((from_edge, to_edge, direction))

    return edges, connections

path = os.path.abspath('./../../../Realistic_Event_Data_Generation_Procedure/') 
xml_file = os.path.join(path,'test.net.xml') 
print('Data path is:', xml_file) 

edges, connections = parse_sumo_xml(xml_file)   

Data path is: /Users/xuzizhuo/Desktop/Main Folder/My_works/routeSys_Github/Model_Training_on_Manual_Allocation_Data/SUMO_Simulation/test.net.xml


In [5]:
# Save to .csv file

def save_connections_to_csv(connections, edge_to_int, filename):
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['from_edge', 'to_edge', 'direction'])
        
        for from_edge, to_edge, direction in connections:
            from_edge_int = edge_to_int[from_edge]
            to_edge_int = edge_to_int[to_edge]
            writer.writerow([from_edge_int, to_edge_int, direction])

edges, connections = parse_sumo_xml(xml_file)
save_connections_to_csv(connections, edge_to_int, 'connections_to_directions.csv')


In [6]:
# 4. Build the mapped int edge id and its static features file

# Generate the dictionary edge_id_to_features of int structure edge ID -> lane_num, speed_limit, average_length,
# store the dictionary edge_id_to_features to a csv file for easy reading in C++.

# Read preprocessed data
df = pd.read_csv('Manhattan_network_mapped.csv')

# Initialize an empty dictionary to store information mapping integer edge_id to its features
edge_id_to_features = {}

for index, row in df.iterrows():
    # Stores lane_num, speed, and length information for the current edge_id
    edge_id_to_features[row['edge_id']] = {
        "lane_num": row['lane_num'],
        "speed": row['speed'],
        "length": row['length'],
        "edge_str": int_to_edge[row['edge_id']]
    }

# Save the dictionary as a CSV file
with open('edge_id_to_features.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["edge_id", "lane_num", "speed", "length", "edge_str"])
    for edge_id, features in edge_id_to_features.items():
        writer.writerow([edge_id, features['lane_num'], features['speed'], features['length'], features['edge_str']])

In [7]:
# 5. Map edge to node pairs & node pairs to edges

# Build the dictionaries nodes_to_edge_raw and edge_to_nodes_raw:
# 1). edge_to_nodes_raw: Maps from edge IDs of string structure to node IDs of string structure
# 2). nodes_to_edge_raw: Maps from node IDs of string structure to edge IDs of string structure

# Read .csv file
df = pd.read_csv('Manhattan_network_raw.csv')

# Initialize two dictionaries
edge_to_nodes_raw = {}  # edge_id 映射到 node_start 和 node_end
nodes_to_edge_raw = {}  # node_start 和 node_end 映射到 edge_id

for index, row in df.iterrows():
    edge_id = row['edge_id']
    node_start = row['node_start']
    node_end = row['node_end']

    # Create a mapping from edge_id to node_start and node_end
    edge_to_nodes_raw[edge_id] = (node_start, node_end)

    # Create a mapping from node_start and node_end to edge_id
    nodes_to_edge_raw[(node_start, node_end)] = edge_id

# 测试输出
print("Edge to Nodes Mapping:", edge_to_nodes_raw["-1004369132#0"])
print("Nodes to Edge Mapping:", nodes_to_edge_raw[(7480410399, 42437990)])

Edge to Nodes Mapping: (7480410399, 42437990)
Nodes to Edge Mapping: -1004369132#0


In [11]:
# 6. Map edge to node pairs & node pairs to edges

# Build the dictionaries nodes_to_edge_mapped and edge_to_nodes_mapped:
# 1). edge_to_nodes_mapped: Maps from edge IDs of int structure to node IDs of int structure
# 2). nodes_to_edge_mapped: Maps from node IDs of int structure to edge IDs of int structure

# Read .csv file
df = pd.read_csv('Manhattan_network_mapped.csv')

# Initialize two dictionaries
edge_to_nodes_mapped = {}  # edge_id 映射到 node_start 和 node_end
nodes_to_edge_mapped = {}  # node_start 和 node_end 映射到 edge_id

for index, row in df.iterrows():
    edge_id = row['edge_id']
    node_start = row['node_start']
    node_end = row['node_end']

    # Create a mapping from edge_id to node_start and node_end
    edge_to_nodes_mapped[edge_id] = (node_start, node_end)

    # Create a mapping from node_start and node_end to edge_id
    nodes_to_edge_mapped[(node_start, node_end)] = edge_id

In [8]:
# 7. Save the SUMO road network structure as a file readable by the Simulation Algorithm


# Paths for the input and output files
csv_file_path = 'Manhattan_network_mapped.csv'
txt_file_path1 = 'Manhattan_network_BJ.txt'
txt_file_path2 = 'Manhattan_network_min_Travel_Time.txt'

# Loading the CSV file
df = pd.read_csv(csv_file_path)

# Calculating unique nodes and edges
unique_nodes = pd.concat([df['node_start'], df['node_end']]).unique()
unique_edges = df['edge_id'].unique()

# The two integer values to be added at the beginning of each .txt file
int_val_1 = len(unique_nodes)
print("Length of unqiue nodes: ", len(unique_nodes))
int_val_2 = len(unique_edges)
print("Length of unique edges: ", len(unique_edges))

# Generating the first .txt file
with open(txt_file_path1, 'w') as txt_file1:
    # Writing the two integers to the TXT file
    txt_file1.write(f"{int(int_val_1)} {int(int_val_2)}\n")
    
    # Iterating over rows in the DataFrame to format and write each row according to specifications
    for index, row in df.iterrows():
        txt_file1.write(f"{int(row['node_start'])} {int(row['node_end'])} {int(row['edge_id'])} {round(row['length'],2)}\n")

# Generating the second .txt file
with open(txt_file_path2, 'w') as txt_file2:
    # Writing the two integers to the TXT file
    txt_file2.write(f"{int(int_val_1)} {int(int_val_2)}\n")
    
    # Iterating over rows in the DataFrame to format and write each row according to specifications
    for index, row in df.iterrows():
        travel_time = round(row['length'] / row['speed'],2)  # Calculating travel time as length_avg / speed_avg
        txt_file2.write(f"{int(row['node_start'])} {int(row['node_end'])} {travel_time}\n")


Length of unqiue nodes:  20859
Length of unique edges:  29497


In [9]:
# 8. Convert route data to a format readable by SUMO

# SUMO can generate trajectory data after the simulation is completed.
# The following code extracts the corresponding information of the trajectory data from the trajectory data.
# Capture trajectory data from ".xml" file to ".csv" file

def trajectory_information_capture(filename):
    # Get the XML file address
    path = os.path.abspath('./../../../Realistic_Event_Data_Generation_Procedure/') 
    data_path = os.path.join(path,filename) 
    
    # Open XML document
    DOMTree = xml.dom.minidom.parse(data_path) 
    # According to the XML document, get the object of the document element
    data = DOMTree.documentElement 
    
    # Get the node list
    nodeList = data.getElementsByTagName("vehicle")
    # Define the number of routes
    nodeLen = len(nodeList)
    print("Length of routes are: ", nodeLen)
    
    all_rows = []
    routeID_set = set()
    for node in nodeList: 
        # Get the current node attribute value
        route_ID = node.getAttribute("id")
        depart_time = node.getAttribute("depart")
        subNodeList = node.getElementsByTagName("route")
        
        for subNode in subNodeList:
            if len(subNodeList) != 1:
                print("Error.")
            
            # Get the current node attribute value
            route = subNode.getAttribute("edges")
            # Splitting a string
            ids = route.split()
            # Extract the first and last ID
            start_node = ids[0]
            end_node = ids[-1]
        
        newRow = {"route_id": route_ID, "depart_time": depart_time,
                  "start_edge": start_node, "end_edge": end_node, "route_by_edge": route}
        routeID_set.add(route_ID)
        all_rows.append(newRow)

    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(all_rows)
    print(f"Number of route is: {len(routeID_set)}")
    return df

df_trajectory = trajectory_information_capture("reRoute.trips.xml")
df_trajectory.to_csv('Manhattan_trajectory_raw.csv', index=False)

Length of routes are:  192495
Number of route is: 192495


In [12]:
# 9. Convert raw edge IDs constructed route into mapped node IDs
# E.g. raw_edge_1 raw_edge_2 ... -> mapped_node_1 mapped_node_2 mapped_node_3

# Read .csv file
df = pd.read_csv('Manhattan_trajectory_raw.csv')
df['route_by_mappped_node'] = None

for index, row in df.iterrows():
    # Split the route_by_edge string and apply the mapping
    node_pairs = [edge_to_nodes_mapped[edge_to_int[edge_id]] for edge_id in row['route_by_edge'].split(' ') if edge_id in edge_to_int]

    # Process node sequences to ensure that nodes are not repeated
    node_sequence = []
    for pair in node_pairs:
        # Make sure the node is converted to a string
        node_start = str(pair[0])
        node_end = str(pair[1])

        if not node_sequence or node_sequence[-1] != node_start:
            node_sequence.append(node_start)
        node_sequence.append(node_end)

    # Update information
    df.at[index, 'route_by_mappped_node'] = ' '.join(node_sequence)

# save to .csv file
df.to_csv('Manhattan_trajectory_mapped_node.csv', index=False)
df.head()

Unnamed: 0,route_id,depart_time,start_edge,end_edge,route_by_edge,route_by_mappped_node
0,2644,0.0,675967322#0,60813173#1,675967322#0 675967322#1 1032459844#0 103245984...,4868.0 18093.0 5489.0 5490.0 2599.0 2256.0 225...
1,2126,1.0,156994977#0,988987066#4,156994977#0 156994977#1 963575583#0 963575583#...,7321.0 7333.0 20074.0 20075.0 20076.0 20079.0 ...
2,3557,1.0,-1088320653#1,420358898#6,-1088320653#1 -1088320653#0 -438390724 -108832...,296.0 295.0 2127.0 294.0 293.0 292.0 291.0 290...
3,1030,2.0,5672977#0,421853810,5672977#0 5672977#1 5672977#2 993379229#3 9933...,5230.0 16941.0 16942.0 16943.0 20687.0 20688.0...
4,3047,2.0,464683316#3,542096278#3,464683316#3 464683316#4 1046750793#0 104675079...,7474.0 13025.0 5723.0 5724.0 13021.0 13019.0 1...


In [19]:
# 10. Read relistic event data 

# Define data path
path = os.path.abspath('./../../../Realistic_Event_Data_Generation_Procedure/') 
input_csv_path = os.path.join(path, 'TraCI_output_adjusted.csv')

# Read the CSV data
df_ETA = pd.read_csv(input_csv_path)
df = df_ETA[['Vehicle_ID', 'E_Length', 'Edge_ID', 'Speed_Net', 'Time', 'Travel_Time', 'Delay_Time', 'LowSpee_Time']]

# Add Wait_Sum to Travel_Time
df['Travel_Time'] = pd.to_numeric(df['Travel_Time'], errors='coerce') + pd.to_numeric(df['Delay_Time'], errors='coerce') + pd.to_numeric(df['LowSpee_Time'], errors='coerce')
# Travel time on tiny road segment should be zero.
df.loc[df['E_Length'] / df['Speed_Net'] < 1, 'Travel_Time'] = 0

# Find the average Travel_Time for different Edge_IDs and build a dictionary with the key being Edge_ID and the value being the average Travel_Time
average_travel_time_dict = df.groupby('Edge_ID')['Travel_Time'].mean().to_dict()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Travel_Time'] = pd.to_numeric(df['Travel_Time'], errors='coerce') + pd.to_numeric(df['Delay_Time'], errors='coerce') + pd.to_numeric(df['LowSpee_Time'], errors='coerce')


In [21]:
# 11. Verify: 
# Whether the route data contained in the trajectory file corresponds to the correctness of the route data,
# converted by TraCI

# Read trajectory .csv data
df = pd.read_csv('Manhattan_trajectory_raw.csv')

# Initialize dictionary
route_dict = {}

for index, row in df.iterrows():
    route_id = row['route_id']
    edge_ids = row['route_by_edge'].split(' ')  # 将 edge ids 拆分成列表
    route_dict[route_id] = edge_ids

# 输出词典
print(len(route_dict))

# Define data path
path = os.path.abspath('./../../../Realistic_Event_Data_Generation_Procedure/') 
data_path = os.path.join(path, 'TraCI_output_adjusted.csv')
# Read realistic event data
df = pd.read_csv(data_path)

# Compute 'Travel_Time' as the sum of the three columns
df['Travel_Time'] = pd.to_numeric(df['Travel_Time'], errors='coerce') + pd.to_numeric(df['Delay_Time'], errors='coerce') + pd.to_numeric(df['LowSpee_Time'], errors='coerce')

# Set the Travel_Time to 0 for rows where E_Length / Speed_Net is less than 1
df.loc[df['E_Length'] / df['Speed_Net'] < 1, 'Travel_Time'] = 0

# Convert 'Delay_Time', 'LowSpee_Time', 'Wait_Time' to 0 and 1
df[['Delay_Time', 'LowSpee_Time', 'Wait_Time']] = df[['Delay_Time', 'LowSpee_Time', 'Wait_Time']].applymap(lambda x: 0 if x == 0 else 1)

row_count = 0
    
# Initialize dictionary
vehicle_edge_time_dict = {}

for index, row in df.iterrows():
    vehicle_id = row['Vehicle_ID']
    edge_id = row['Edge_ID']
    
    # Building the inner dictionary
    time_data = {
        'Delay_Time': row['Delay_Time'],
        'LowSpee_Time': row['LowSpee_Time'],
        'Wait_Time': row['Wait_Time'],
        'Travel_Time': row['Travel_Time'],
        'Driving_Num': row['Driving_Num']
    }
    
    # If the vehicle_id already exists in the dictionary
    if vehicle_id in vehicle_edge_time_dict:
        # If edge_id has not been recorded yet, add it
        if edge_id not in vehicle_edge_time_dict[vehicle_id]:
            vehicle_edge_time_dict[vehicle_id][edge_id] = time_data
        else:
            # If edge_id already exists, you can choose to update the data or skip
            print(f"Edge {edge_id} 已存在于 vehicle {vehicle_id} 中")
    else:
        # If the vehicle_id does not exist, initialize a new dictionary
        vehicle_edge_time_dict[vehicle_id] = {edge_id: time_data}

    row_count += 1

    # Print process procedure
    if row_count % 5000000 == 0:
        print(f"已处理 {row_count} 行数据")

192484


In [23]:
# 12. Correctness check

# 1). Verify that two dictionaries have the same length
if len(route_dict) != len(vehicle_edge_time_dict):
    print(f"Error: two dic length are different - route_dict length: {len(route_dict)}, vehicle_edge_time_dict length: {len(vehicle_edge_time_dict)}")

# 2). Verify that the edge ids for each route_id contain duplicates
def check_duplicates(route_dict):
    for route_id, edge_ids in route_dict.items():
        if len(edge_ids) != len(set(edge_ids)):
            print(f"Error: route_id {route_id} exists duplicated edge ids")

# 3). Check route_dict for duplicate edge ids
print("Check if the edge ids in route_dict contain duplicates:")
check_duplicates(route_dict)

# 4). Verify that the edge ids in route_dict contain the edge ids in vehicle_edge_time_dict
print("Check if route_dict contains the edge ids from vehicle_edge_time_dict:")
for route_id, edge_dict in vehicle_edge_time_dict.items():
    if route_id in route_dict:
        route_edges = set(route_dict[route_id])
        vehicle_edges = set(edge_dict.keys())
        
        # 如果 route_dict 的 edge ids 不完全包含 vehicle_edge_time_dict 的 edge ids，打印错误
        if not vehicle_edges.issubset(route_edges):
            print(f"Error: route_id {route_id}'s edge ids do not all contained in route_dict .")
    else:
        print(f"Error: route_id {route_id} does not exit in route_dict")


验证1:词典长度一致
验证2.1 : 检查 route_dict 中的 edge ids 是否包含重复:
验证3: 检查 route_dict 是否包含 vehicle_edge_time_dict 的 edge ids:


In [24]:
# 13. Complete data


# Traverse route_id and process edge ids under each route_id
for route_id in route_dict:
    
    # Get edge ids in route_dict
    route_edges = route_dict[route_id]  # 这个是完整的 edge ids 列表
    
    # Check if the route_id exists in vehicle_edge_time_dict
    if route_id in vehicle_edge_time_dict:
        # Get the edge ids under the current route_id in vehicle_edge_time_dict
        vehicle_edges = list(vehicle_edge_time_dict[route_id].keys())
    else:
        # If the route_id does not exist in vehicle_edge_time_dict, initialize an empty nested dictionary
        print(f"Error: route_id {route_id} 不存在于 vehicle_edge_time_dict 中")
        vehicle_edge_time_dict[route_id] = {}
        vehicle_edges = []

    # Find missing edge ids: edge ids that are in route_dict but not in vehicle_edge_time_dict
    missing_edges = [edge_id for edge_id in route_edges if edge_id not in vehicle_edges]
    
    # For missing edge ids, complete the values ​​in vehicle_edge_time_dict
    for edge_id in missing_edges:
        # Initialized to 0 value, including all fields
        vehicle_edge_time_dict[route_id][edge_id] = {
            'Delay_Time': 0,
            'LowSpee_Time': 0,
            'Wait_Time': 0,
            'Travel_Time': 0,
            'Driving_Num': 1,
        }

    # 5. Keep order: Arrange the edge ids of vehicle_edge_time_dict in the order of route_dict
    # We first get the complete list of edge ids in order
    ordered_edges = {edge_id: vehicle_edge_time_dict[route_id][edge_id] for edge_id in route_edges}
    # Reassign the ordered edge ids back to vehicle_edge_time_dict
    vehicle_edge_time_dict[route_id] = ordered_edges

# Finally, check the result to confirm whether the edge ids of each route_id have been completed.
for route_id in vehicle_edge_time_dict:
    if not set(vehicle_edge_time_dict[route_id].keys()) == set(route_dict[route_id]):
        print(f"Error: route_id {route_id} 未能成功补全")

In [None]:
# 14. Generates both route data and time data

# In route.txt, the first value of each line is the node length, 
# followed by the start_node and the corresponding time bool information

# In time.txt, the first value of each line is the vehicle id, the length of the travel time value, 
# and the following travel time value as truth

# Sort vehicle_edge_time_dict by route_id
vehicle_edge_time_dict = dict(sorted(vehicle_edge_time_dict.items(), key=lambda item: item[0]))

# Open the route.txt and time.txt file for writing
with open('route.txt', 'w') as route_file, open('time.txt', 'w') as time_file:
    
    # Iterate over each route_id in vehicle_edge_time_dict
    for idx, (route_id, edge_time_dict) in enumerate(vehicle_edge_time_dict.items()):
        
        # Determine whether idx is equal to route_id
        if idx+1 != route_id:
            print(f"Mismatch: idx {idx} does not match route_id {route_id}")
        
        # Get a list of all edge ids under the route_id
        edge_ids = list(edge_time_dict.keys())
        
        # The length of edge ids written to the route.txt file
        route_file.write(f"{len(edge_ids) + 1} ")  # +1 because we need to write to second_node last
        
        # The length of the route_id and edge ids written to the time.txt file
        time_file.write(f"{len(edge_ids)} ")
        
        # Traverse each edge_id and get the corresponding time data
        for edge_id in edge_ids:
            # Use edge_to_int to convert edge_id to an integer
            if edge_id in edge_to_int:
                edge_int = edge_to_int[edge_id]
            else:
                # If edge_id is not in edge_to_int, print an error message and skip the edge_id
                print(f"Error: edge_id {edge_id} 不存在于 edge_to_int 词典中")
                continue  # Skip this edge_id
            
            # Use edge_to_nodes_mapped dictionary to convert edge_int to node pair
            if edge_int in edge_to_nodes_mapped:
                first_node, second_node = edge_to_nodes_mapped[edge_int]
            else:
                # If edge_int is not in edge_to_nodes_mapped, print an error message and skip the edge_id
                print(f"Error: edge_int {edge_int} 不存在于 edge_to_nodes_mapped 词典中")
                continue  # Skip this edge_id
            
            # Get the time data of the edge
            delay_time = edge_time_dict[edge_id]['Delay_Time']
            low_speed_time = edge_time_dict[edge_id]['LowSpee_Time']
            wait_time = edge_time_dict[edge_id]['Wait_Time']
            travel_time = edge_time_dict[edge_id]['Travel_Time']
            driving_num = edge_time_dict[edge_id]['Driving_Num']
            
            # Write first_node and its Delay_Time, LowSpee_Time, Wait_Time to the route.txt file
            route_file.write(f"{int(first_node)} {delay_time} {low_speed_time} {wait_time} {driving_num} ")
            
            # Write Travel_Time to time.txt file
            time_file.write(f"{travel_time} ")
        
        # The second_node of the last edge is written into the route.txt file. 
        # All three time values are assigned 0, and the last value is 1 as the minimum driving number.
        if edge_ids:
            route_file.write(f"{int(second_node)} 0 0 0 1")
        
        # Wrap after each route is processed
        route_file.write('\n')
        time_file.write('\n')

print("Data has been written to route.txt and time.txt")


In [28]:
# 15. Generate route_edge.txt file

# Open the route.txt file for writing
with open('route_edge.txt', 'w') as route_file:
    
    # Iterate over each route_id in vehicle_edge_time_dict
    for idx, (route_id, edge_time_dict) in enumerate(vehicle_edge_time_dict.items()):
        
        # Determine whether idx is equal to route_id
        if idx+1 != route_id:
            print(f"Mismatch: idx {idx} does not match route_id {route_id}")
        
        # Get a list of all edge ids under the route_id
        edge_ids = list(edge_time_dict.keys())
        
        # The length of edge ids written to the route.txt file
        route_file.write(f"{len(edge_ids)} ")
        
        # Traverse each edge_id and get the corresponding time data
        for edge_id in edge_ids:
            
            # Write first_node and its Delay_Time, LowSpee_Time, Wait_Time to the route.txt file
            route_file.write(f"{edge_id} ")
        
        # Wrap after each route is processed
        route_file.write('\n')

print("Data has been written to route_edge.txt")


Data has been written to route_edge.txt


In [30]:
# 16. Generate query.txt file

# Read csv file
df = pd.read_csv('Manhattan_trajectory_mapped_node.csv')
df = df.sort_values(by='route_id')

# Open a new text file for writing data
with open('query.txt', 'w') as file:
    for index, row in df.iterrows():
        # Extract the origin and destination nodes and convert them to integers
        node_sequence = row['route_by_mappped_node'].split()
        departure_node = int(float(node_sequence[0]))  # Convert to floating point first, then to integer
        destination_node = int(float(node_sequence[-1]))

        # Extract the departure time and convert it to an integer
        departure_time = int(row['depart_time'])

        # Write the extracted information into a file in the format
        file.write(f"{departure_node} {destination_node} {departure_time}\n")

print("Data has been written to query.txt")

Data has been written to query.txt
