In [None]:
!apt-get install sumo sumo-tools sumo-doc
!pip install traci numpy torch matplotlib pandas


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  binfmt-support fastjar fonts-roboto-unhinted jarwrapper javascript-common libcoin80c
  libcollada-dom2.5-dp0 libfox-1.6-0 libgdal30 libglu1-mesa libjs-openlayers liblua5.2-0
  libopenscenegraph161 libopenthreads21 libpoppler-glib8 libproj22 proj-bin
Suggested packages:
  apache2 | lighttpd | httpd libopenal0a libsimage-dev libbullet2
The following NEW packages will be installed:
  binfmt-support fastjar fonts-roboto-unhinted jarwrapper javascript-common libcoin80c
  libcollada-dom2.5-dp0 libfox-1.6-0 libgdal30 libglu1-mesa libjs-openlayers liblua5.2-0
  libopenscenegraph161 libopenthreads21 libpoppler-glib8 libproj22 proj-bin sumo sumo-doc
  sumo-tools
0 upgraded, 20 newly installed, 0 to remove and 29 not upgraded.
Need to get 52.8 MB of archives.
After this operation, 270 MB of additional disk space will be used.
Get:1 http://archive.

In [None]:
import csv
import sumolib

def extract_and_save_network_data(network_file, output_file="network_data.csv"):
    # Load the network using sumolib
    net = sumolib.net.readNet(network_file)

    # Extract and save edge information
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Edge ID", "From Node", "To Node", "Length (m)", "Speed Limit (m/s)", "Number of Lanes"])

        for edge in net.getEdges():
            # Fix for Excel issue by adding a single quote to Edge IDs
            edge_id = f"'{edge.getID()}"

            writer.writerow([
                edge_id,
                edge.getFromNode().getID(),
                edge.getToNode().getID(),
                edge.getLength(),
                edge.getSpeed(),
                len(edge.getLanes())
            ])

    print(f"Data successfully saved to {output_file}")

# Provide your network file (.net.xml)
network_file = "/content/updated.net.xml"
extract_and_save_network_data(network_file)


Data successfully saved to network_data.csv


In [None]:
import csv

input_file = '/content/all_edge_data.csv'
output_file = 'all_edge_data_cleaned.csv'

with open(input_file, 'r') as infile, open(output_file, 'w', newline='') as outfile:
    reader = csv.reader(infile)
    writer = csv.writer(outfile)

    header = next(reader)  # Copy header
    writer.writerow(header)

    for row in reader:
        if not row or len(row) < 6:
            continue  # Skip empty or malformed rows

        edge_id = row[1].lstrip("'")  # Remove single quote
        if edge_id.startswith(":"):
            continue  # Skip internal edges

        try:
            avg_speed = float(row[2])
            vehicle_count = int(float(row[3]))  # Convert vehicle count safely
        except ValueError:
            continue  # Skip invalid numeric data

        if vehicle_count == 0 or avg_speed == 0:
            continue  # Skip zero-traffic data

        row[1] = edge_id  # Update cleaned edge_id back in row
        writer.writerow(row)


In [None]:
import pandas as pd

# Load data
data = pd.read_csv("/content/all_edge_data_cleaned.csv")
print(data.head(1000))
data.info()


     step edge_id  avg_speed  vehicle_count  occupancy  traffic_flow
0     100    -E10  20.978846              1   0.467942     20.978846
1     100    -E12  18.802583              1   0.289989     18.802583
2     100    -E13  18.020909              3   1.000383     54.062726
3     100    -E15  15.464651              2   0.556756     30.929301
4     100    -E16  20.160676              2   0.340483     40.321351
..    ...     ...        ...            ...        ...           ...
995  1800    -E25  19.300443              2   1.033310     38.600885
996  1800    -E26  16.052562              3   2.343570     48.157686
997  1800    -E27  17.381958              2   1.966246     34.763917
998  1800    -E28  14.596512              7   5.939328    102.175583
999  1800    -E29  19.204030              6   3.584383    115.224180

[1000 rows x 6 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6355 entries, 0 to 6354
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype