The ground_truth notebook generates the ground truth travel time of the SUMO simulation result of the ground truth network. 
The travel time is averaged across 100 different seeds of SUMO runs for a more stable ground truth data. 



In [19]:
import numpy as np
import os
import pandas as pd
import xml.etree.ElementTree as ET
import csv
import random

In [20]:
#define support function PARSE_SUMO_OUTPUT

#parse SUMO result from the travel times output xml file to be a dataframe 

def parse_sumo_output(output_xml_file):
#read link travel time
    # Load and parse the XML file
    tree = ET.parse(output_xml_file)
    root = tree.getroot()
    
    # Prepare the CSV file
    
    csv_columns = ['interval_begin', 'interval_end', 'edge_id', 'traveltime']
    
    # Open the CSV file for writing the information from edge probe travel time
    with open(f'travel_times.csv', 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
    
        # Iterate through each interval
        for interval in root.findall('interval'):
            interval_begin = int(float(interval.get('begin')))
            interval_end = int(float(interval.get('end')))
    
            # Iterate through each edge in the interval
            for edge in interval.findall('edge'):
                edge_id = edge.get('id')
                traveltime = float(edge.get('traveltime', '0'))
    
                if traveltime>0 and traveltime<1000 and interval_begin>=120 : #exclude the first 2 minutes interval and the intervals where there are no more traffic on the edge
                    writer.writerow({
                    'interval_begin': interval_begin,
                    'interval_end': interval_end,
                    'edge_id': edge_id,
                    'traveltime': traveltime
                })
    
    #read the raw result file
    file_path = f'travel_times.csv'
    df = pd.read_csv(file_path)
      
    # Group by 'edge_id' and calculate the mean travel time
    average_travel_times = df.groupby('edge_id')['traveltime'].mean().reset_index()

    #define column name 
    average_travel_times.columns = ['edge_id', 'average_traveltime']

    #save travel time average results
    average_travel_times.to_csv(f'average_travel_times.csv',index=False)
    return average_travel_times


***Main Function***

In [None]:
#run SUMO simulation with ground truth network and 100 different seeds, and group the results by edge_id to a summary_average_travel_times dataframe  
all_outputs=pd.DataFrame()

for se in range(0,100,1):
    !sumo -c config.sumo.cfg --seed {se} 

    new_output=parse_sumo_output('travel_times.xml')
    print('new_output',new_output)
    all_outputs = pd.concat([all_outputs, new_output], axis=0)


summary_average_travel_times = all_outputs.groupby('edge_id')['average_traveltime'].mean().reset_index()
summary_average_travel_times.columns = ['edge_id', 'average_traveltime']



In [22]:
#check the stats of the outputs
all_outputs.groupby('edge_id')['average_traveltime'].describe()
#standard deviation

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
edge_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12,100.0,38.538275,0.725114,36.24,38.089062,38.38375,38.911563,40.505
21,100.0,11.65395,0.249973,11.19375,11.448437,11.648125,11.819375,12.34375
23,100.0,24.094037,1.937568,20.2825,22.360312,24.42875,25.255625,28.32125
26,100.0,11.485014,0.279377,11.002857,11.2825,11.459286,11.626786,12.541429
29,100.0,10.776143,0.246438,10.285714,10.605714,10.763571,10.949643,11.382857
32,100.0,29.02685,3.371594,21.875,26.145,28.648125,31.35,38.3425
34,100.0,29.419425,2.817849,23.2125,27.37125,29.054375,31.646562,38.245
37,100.0,11.728486,0.266184,11.142857,11.55,11.712143,11.856071,12.608571
43,100.0,24.227,1.814742,19.645,23.113125,24.404375,25.2675,30.67
45,100.0,10.424188,0.223219,9.96625,10.273125,10.415625,10.574063,11.13625


In [24]:
#Extract information of speed limit, edge length and freeflow travel time (ff_tt) from the ground truth network for bayesian experiment 
import xml.etree.ElementTree as ET

# Parse the XML file
tree = ET.parse('../3intersections_gt.net.xml')
root = tree.getroot()

# Define the edge IDs to extract
edge_ids = {"12", "23", "32", "34", "43", "54", "62", "73", "84", "92", "103", "114"}

# Initialize a list to store results
edge_data = []

# Iterate over all edges in the XML
for edge in root.findall("edge"):
    edge_id = edge.get("id")
    if edge_id in edge_ids:
        lane = edge.find("lane")
        speed = lane.get("speed")
        length = lane.get("length")
        edge_data.append({"edge_id": edge_id, "speed": speed, "length": length})

import pandas as pd

# Convert to a DataFrame
edge_df = pd.DataFrame(edge_data)

#fix data type
edge_df['length'] = pd.to_numeric(edge_df['length'], errors='coerce')
edge_df['speed'] = pd.to_numeric(edge_df['speed'], errors='coerce')
edge_df['edge_id'] = pd.to_numeric(edge_df['edge_id'], errors='coerce')

#freeflow travel time = edge length/speed limit
edge_df['ff_tt']=edge_df['length']/edge_df['speed']
edge_df

Unnamed: 0,edge_id,speed,length,ff_tt
0,103,13.89,131.67,9.479482
1,114,13.89,133.32,9.598272
2,12,13.89,141.75,10.205184
3,23,13.89,138.92,10.00144
4,32,13.89,138.92,10.00144
5,34,13.89,138.08,9.940965
6,43,13.89,138.08,9.940965
7,54,13.89,126.24,9.088553
8,62,13.89,140.67,10.12743
9,73,13.89,143.95,10.363571


In [25]:
#merge the SUMO average travel time and the freeflow travel time, length and speed limit 
merged_df = pd.merge(summary_average_travel_times, edge_df, on='edge_id', how='inner')
merged_df

Unnamed: 0,edge_id,average_traveltime,speed,length,ff_tt
0,12,38.538275,13.89,141.75,10.205184
1,23,24.094037,13.89,138.92,10.00144
2,32,29.02685,13.89,138.92,10.00144
3,34,29.419425,13.89,138.08,9.940965
4,43,24.227,13.89,138.08,9.940965
5,54,36.810613,13.89,126.24,9.088553
6,62,29.673514,13.89,140.67,10.12743
7,73,29.832043,13.89,143.95,10.363571
8,84,29.987471,13.89,143.39,10.323254
9,92,30.054543,13.89,132.51,9.539957


In [26]:
#save to .csv file
merged_df.to_csv(f'summary_average_travel_times.csv',index=False)


In [27]:
merged_df

Unnamed: 0,edge_id,average_traveltime,speed,length,ff_tt
0,12,38.538275,13.89,141.75,10.205184
1,23,24.094037,13.89,138.92,10.00144
2,32,29.02685,13.89,138.92,10.00144
3,34,29.419425,13.89,138.08,9.940965
4,43,24.227,13.89,138.08,9.940965
5,54,36.810613,13.89,126.24,9.088553
6,62,29.673514,13.89,140.67,10.12743
7,73,29.832043,13.89,143.95,10.363571
8,84,29.987471,13.89,143.39,10.323254
9,92,30.054543,13.89,132.51,9.539957
