Edge List

In [None]:
import xml.etree.ElementTree as ET


tree = ET.parse('/content/reduced.net.xml')
root = tree.getroot()
alledges = []
for edge in root.findall('.//edge'):
    id_attr = edge.get('id')
    print(id_attr)
    alledges.append(id_attr)

print(alledges)

FileNotFoundError: [Errno 2] No such file or directory: '/content/reduced.net.xml'

Network Graph

In [None]:
import xml.etree.ElementTree as ET
import numpy as np

def create_edge_matrix(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    edges = []
    nodes=[]
    for edge in root.findall('.//edge'):
        id_attr = edge.get('id')
        from_attr = edge.get('from')
        to_attr = edge.get('to')

        if id_attr is not None and from_attr is not None and to_attr is not None:
            edges.append([id_attr])
            nodes.append([from_attr, to_attr])

    # Convert to numpy array for easier handling
    edge_matrix = np.array(edges)
    nodes_matrix = np.array(nodes)

    return edge_matrix , nodes_matrix

# Example usage:
graph_matrix=[]
for i in range(1,990):
    graph_matrix.append(create_edge_matrix('/content/reduced.net.xml'))


Adjacency Matrix

In [None]:
import xml.etree.ElementTree as ET
import numpy as np

def create_adjacency_matrix(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    nodes = set()
    edges = []

    for edge in root.findall('.//edge'):
        from_node = edge.get('from')
        to_node = edge.get('to')

        if from_node is not None and to_node is not None:
            nodes.add(from_node)
            nodes.add(to_node)
            edges.append((from_node, to_node))

    nodes = sorted(list(nodes))
    node_to_index = {node: index for index, node in enumerate(nodes)}

    adjacency_matrix = np.zeros((len(nodes), len(nodes)), dtype=int)

    for from_node, to_node in edges:
        adjacency_matrix[node_to_index[from_node], node_to_index[to_node]] = 1

    return adjacency_matrix, nodes


# Example usage (assuming graph_matrix is defined as in the previous code)
adjacency_matrices = []

for i in range(1,990):

    try:
      adjacency_matrix, nodes = create_adjacency_matrix('/content/reduced.net.xml')
      adjacency_matrices.append((adjacency_matrix, nodes))

    except FileNotFoundError:
      print(f"File '/content/reduced.net.xml' not found. Skipping.")

    except ET.ParseError:
      print(f"Error parsing XML file '/content/reduced.net.xml'. Skipping.")


Incidence Matrix

In [None]:
# prompt: make incidence matrices of above

import xml.etree.ElementTree as ET
import numpy as np

def create_incidence_matrix(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    nodes = set()
    edges = []
    edge_id_to_index = {}

    for i, edge in enumerate(root.findall('.//edge')):
        from_node = edge.get('from')
        to_node = edge.get('to')
        edge_id = edge.get('id')

        if from_node is not None and to_node is not None and edge_id is not None:
            nodes.add(from_node)
            nodes.add(to_node)
            edges.append((from_node, to_node, edge_id))
            edge_id_to_index[edge_id] = i

    nodes = sorted(list(nodes))
    node_to_index = {node: index for index, node in enumerate(nodes)}

    incidence_matrix = np.zeros((len(nodes), len(edges)), dtype=int)

    for i, (from_node, to_node, edge_id) in enumerate(edges):
        incidence_matrix[node_to_index[to_node], i] = 1  # Incoming edge

    return incidence_matrix, nodes, edges


# Example usage (assuming you want to process files from 1 to 999)
incidence_matrices = []

for i in range(1, 990):

    try:
        incidence_matrix, nodes, edges = create_incidence_matrix('/content/reduced.net.xml')
        incidence_matrices.append((incidence_matrix, nodes, edges))
    except FileNotFoundError:
        print(f"File '/content/reduced.net.xml' not found. Skipping.")
    except ET.ParseError:
        print(f"Error parsing XML file '/content/reduced.net.xml'. Skipping.")
    except Exception as e:  # Catch other potential errors during processing
        print(f"An error occurred while processing file '/content/reduced.net.xml': {e}")

In [None]:
print(incidence_matrices[0][0].shape)

(89, 212)


Lane to (Speed,Length) dictionary

In [None]:
import xml.etree.ElementTree as ET
import numpy as np

def edge_lane_info(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    edge_data = {}

    for edge in root.findall('.//edge'):
        edge_id = edge.get('id')
        if edge_id is not None:
            edge_data[edge_id] = []
            for lane in edge.findall('.//lane'):
                lane_id = lane.get('id')
                speed = lane.get('speed')
                length = lane.get('length')
                if lane_id is not None and speed is not None and length is not None:
                    edge_data[edge_id].append({'lane_id': lane_id, 'speed': speed, 'length': length})
    return edge_data

print(edge_lane_info('/content/reduced.net.xml'))

{'-1278799627': [{'lane_id': '-1278799627_0', 'speed': '11.80', 'length': '64.76'}], '-1278799628#3': [{'lane_id': '-1278799628#3_0', 'speed': '11.80', 'length': '42.48'}, {'lane_id': '-1278799628#3_1', 'speed': '11.80', 'length': '42.48'}], '-1279773499#1': [{'lane_id': '-1279773499#1_0', 'speed': '7.08', 'length': '33.68'}], '-136030379#0': [{'lane_id': '-136030379#0_0', 'speed': '11.80', 'length': '81.01'}], '-136030379#1': [{'lane_id': '-136030379#1_0', 'speed': '11.80', 'length': '22.06'}], '-136030379#2': [{'lane_id': '-136030379#2_0', 'speed': '11.80', 'length': '22.84'}], '-136030379#3': [{'lane_id': '-136030379#3_0', 'speed': '11.80', 'length': '10.85'}], '-162174644#0': [{'lane_id': '-162174644#0_0', 'speed': '7.08', 'length': '154.91'}], '-162174644#1': [{'lane_id': '-162174644#1_0', 'speed': '7.08', 'length': '105.80'}], '-201536458#0': [{'lane_id': '-201536458#0_0', 'speed': '11.80', 'length': '60.85'}], '-201536458#3': [{'lane_id': '-201536458#3_0', 'speed': '11.80', 'len

Average Speed Length

In [None]:
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np

def edge_lane_info(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    edge_data = {}

    for edge in root.findall('.//edge'):
        edge_id = edge.get('id')
        if edge_id is not None:
            edge_data[edge_id] = []
            for lane in edge.findall('.//lane'):
                lane_id = lane.get('id')
                speed = lane.get('speed')
                length = lane.get('length')
                if lane_id is not None and speed is not None and length is not None:
                    edge_data[edge_id].append({'lane_id': lane_id, 'speed': float(speed), 'length': float(length)})
    return edge_data

def average_speed_length(edge_data):
    result = {}
    for edge_id, lane_info in edge_data.items():
        speeds = [lane['speed'] for lane in lane_info]
        lengths = [lane['length'] for lane in lane_info]

        if speeds and lengths:  # Check if lists are not empty
            avg_speed = np.mean(speeds)
            avg_length = np.mean(lengths)
            result[edge_id] = [avg_speed, avg_length]
    return result

# Example usage
averages = []
avg = []

for i in range(1,990):

  edge_info = edge_lane_info('/content/reduced.net.xml')
  averages.append(pd.DataFrame(average_speed_length(edge_info)))
  avg.append(average_speed_length(edge_info))



Routes and (Start,End) point

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd

def destination(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    for route in root.findall('.//route'):  # Adjust the XPath if necessary

            edges = route.get('edges').split()
            exit_times = route.attrib.get('exitTimes').split()

    start = edges[0]
    end = edges[len(edges)-1]




    d = []
    for i in range(len(alledges)):
        if str(alledges[i]) == start:
          d.append(1)

        elif str(alledges[i]) == end:
          d.append(-1)

        else:
          d.append(0)

    return d

def extract_edges_exittimes(xml_file):
    """
    Reads an XML file, extracts edges and exitTimes from <routes edges="..." exitTimes="..."> tags,
    and returns a Pandas DataFrame.

    Args:
        xml_file (str): Path to the XML file.

    Returns:
        pandas.DataFrame: DataFrame with 'edges' and 'exitTimes' columns, or None if no matching tags are found.
    """
    try:
        tree = ET.parse(xml_file)
        root = tree.getroot()

        edges_data = []
        for vehicle in root.findall('.//vehicle'):
            depart = vehicle.get('depart')



        for route in root.findall('.//route'):  # Adjust the XPath if necessary

            edges = route.get('edges').split()
            exit_times = route.attrib.get('exitTimes').split()
            collection =[]

            for i in range(len(edges)):
              collection.append(edges[i])
              if i == 0 :
                edges_data.append([edges[i],str(float(exit_times[i])-float(depart))])

              else:
                edges_data.append([edges[i],str(float(exit_times[i])-float(exit_times[i-1]))])

        true_edges_data = []
        for i in range(len(alledges)):
          ctr = 0
          for j in range(len(collection)):
            if str(alledges[i]) ==  collection[j]:
              true_edges_data.append(edges_data[j])
              ctr=ctr+1
          if ctr == 1:
            continue
          else:
            true_edges_data.append([alledges[i],'0'])
        edges_data = true_edges_data

        return edges_data

    except Exception as e:
        print(f"An error occurred: {e}")
        return None




# Example usage
routes_df=[]
destination_df=[]
routes=[]
for i in range(1,990):

  netxml_filepath = "/content/routes_WARB"+str(i)+".xml"

  routes.append(extract_edges_exittimes(netxml_filepath))

  routes_df.append(pd.DataFrame(extract_edges_exittimes(netxml_filepath)))
  destination_df.append(destination(netxml_filepath))


In [None]:
print(routes_df[700])
print(destination_df[700])

                 0  1
0      -1278799627  0
1    -1278799628#3  0
2    -1279773499#1  0
3     -136030379#0  0
4     -136030379#1  0
..             ... ..
207     98527089#0  0
208       98527101  0
209     98527102#0  0
210       98530229  0
211       98530243  0

[212 rows x 2 columns]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
routes_dict = []
for i in range(989):

  dictionary={}
  for j in range(len(routes[i])):
    dictionary[routes[i][j][0]] = routes[i][j][1]
    routes_dict.append(dictionary)
routes_dict

print(routes_dict[7])


{'-1278799627': '0', '-1278799628#3': '0', '-1279773499#1': '0', '-136030379#0': '0', '-136030379#1': '0', '-136030379#2': '0', '-136030379#3': '0', '-162174644#0': '0', '-162174644#1': '0', '-201536458#0': '0', '-201536458#3': '0', '-201536458#4': '0', '-24585476#0': '0', '-24585476#1': '0', '-24585476#2': '0', '-257571774#0': '0', '-257571774#1': '0', '-257575697': '0', '-257575698': '0', '-257575722#1': '0', '-257575722#3': '0', '-258204735': '0', '-258989044#4': '0', '-258989046#3': '0', '-259194247#0': '0', '-259194247#1': '0', '-259194247#3': '0', '-259194247#6': '0', '-259194247#8': '0', '-259194248#1': '0', '-259194249#1': '9.0', '-259194249#2': '0', '-259194251': '6.0', '-259194252': '0', '-259194253#1': '8.0', '-259194254#1': '0', '-25996752#1': '0', '-25996752#3': '0', '-25996752#4': '0', '-25996752#5': '0', '-25996752#7': '0', '-260590855#0': '0', '-260590855#1': '0', '-333043524#0': '0', '-333043524#1': '0', '-333043524#2': '0', '-333043524#3': '0', '-333043524#8': '0', '-

In [None]:
!pip install torch_geometric



Graph Neural Network Dataset 1

In [None]:
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch_geometric
from torch_geometric.data import Data

# ... (Your existing code for adjacency_matrices, averages, and routes_df)

def create_gnn_dataset(adjacency_matrices, averages, routes_df):

    dataset = []
    X=[]
    Y=[]
    for i in range(min(len(adjacency_matrices), len(averages), len(routes_df))):
        adj_matrix = adjacency_matrices[i][0]
        incidence_matrix = incidence_matrices[i][0]

        # Convert adjacency matrix to edge index

        edge_index = torch.tensor(np.nonzero(adj_matrix), dtype=torch.long)

        # Node features (example - using average edge attributes)
        # Map edge attributes to nodes based on the adjacency matrix structure
        # You might need a more sophisticated node feature extraction method based on your problem.

        node_features = np.zeros((adj_matrix.shape[0], 1))


        x = torch.tensor(node_features, dtype=torch.float)

        # Edge features from averages (avg_speed, avg_length)

        edge_attr = []
        for j in range(edge_index.shape[1]):
          u,v = edge_index[0,j].item(), edge_index[1,j].item()

          to = adjacency_matrices[i][1][v]
          fr = adjacency_matrices[i][1][u]

          for k in range(len(graph_matrix[i][1])):
            edge_id = ""
            if graph_matrix[i][1][k][0] == fr and graph_matrix[i][1][k][1] == to:
              edge_id = str(graph_matrix[i][0][k][0])
              edge_attr.append([avg[i].get(edge_id)[0]+100000*avg[i].get(edge_id)[1]])
              break
        edge_attr = torch.tensor(edge_attr, dtype=torch.float)






        # Target variable (edge probabilities)
        # Mapping edge IDs to probabilities.  Need to handle potential missing keys.
        y_list = []
        for edge in alledges:
            y_list.append(float(routes_dict[i].get(edge)))

        y = torch.tensor(y_list, dtype=torch.float).reshape(-1,1)
        z = torch.matmul(torch.tensor(incidence_matrices[0][0], dtype=torch.float) ,y)
        z = torch.tensor(z,dtype=torch.float)
        is_z_nonzero = (z != 0).to(torch.bool).int()
        z = is_z_nonzero.to(torch.float)
        z = 1000*z
        Y.append(z)

        data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr,z=z,is_z_nonzero=is_z_nonzero)
        dataset.append(data)

    return dataset ,

# Create the GNN dataset
gnn_dataset_1 , X , Y = create_gnn_dataset(adjacency_matrices, averages, routes_df)


  edge_index = torch.tensor(np.nonzero(adj_matrix), dtype=torch.long)
  z = torch.tensor(z,dtype=torch.float)


In [None]:
print(gnn_dataset_1[7].z)

tensor([[1000.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [1000.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [1000.],
        [   0.],
        [1000.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.],
        [   0.

GNN Dataset 2

In [None]:
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
!pip install torch_geometric
import torch_geometric
from torch_geometric.data import Data

# ... (Your existing code for adjacency_matrices, averages, and routes_df)

def create_gnn_dataset(adjacency_matrices, destination_df, routes_df):

    dataset = []
    for i in range(min(len(adjacency_matrices), len(averages), len(routes_df))):
        adj_matrix = adjacency_matrices[i][0]
        incidence_matrix = incidence_matrices[i][0]
        # Convert adjacency matrix to edge index
        edge_index = torch.tensor(np.nonzero(adj_matrix), dtype=torch.long)
        # Node features (example - using average edge attributes)
        # Map edge attributes to nodes based on the adjacency matrix structure
        # You might need a more sophisticated node feature extraction method based on your problem.
        node_features = np.zeros((adj_matrix.shape[0], 1))


        x = torch.tensor(node_features, dtype=torch.float)

        # Edge features (start and end destination)
        edge_attr = []
        for j in range(edge_index.shape[1]):
            edge_attr.append(destination_df[i][j])

        edge_attr = torch.tensor(edge_attr, dtype=torch.float)



        # Target variable (edge probabilities)
        # Mapping edge IDs to probabilities.  Need to handle potential missing keys.
        y_list = []
        for edge in alledges:
            y_list.append(float(routes_dict[i].get(edge)))

        y = torch.tensor(y_list, dtype=torch.float).reshape(-1,1)
        z = torch.matmul(torch.tensor(incidence_matrices[0][0], dtype=torch.float) ,y)
        z = torch.tensor(z,dtype=torch.float)
        is_z_nonzero = (z != 0).to(torch.bool).int()
        z = is_z_nonzero.to(torch.float)
        z = z
        data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr,z=z,is_z_nonzero=is_z_nonzero)
        dataset.append(data)

    return dataset

# Create the GNN dataset
gnn_dataset_2 = create_gnn_dataset(adjacency_matrices, destination_df, routes_df)
print(gnn_dataset_2[800].x.shape)
print(gnn_dataset_2[800].edge_index.shape)
print(gnn_dataset_2[800].edge_attr.shape)
print(gnn_dataset_2[800].z.shape)



  z = torch.tensor(z,dtype=torch.float)


torch.Size([89, 1])
torch.Size([2, 211])
torch.Size([211])
torch.Size([89, 1])







GRAPH NEURAL NETWORK

In [None]:
# prompt: write a code for gnn with 80:20 split and use BCE as loss function

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from sklearn.model_selection import train_test_split

# Define the GNN model
class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GNN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index, edge_attr)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index, edge_attr)
        return torch.sigmoid(x) # Sigmoid for BCE Loss

# Assuming gnn_dataset is your dataset created in the previous code
# Split the dataset into training and testing sets (80:20 split)
train_dataset, test_dataset = train_test_split(gnn_dataset_2, test_size=0.2, random_state=42)

# Create data loaders
train_loader = torch_geometric.loader.DataLoader(train_dataset, batch_size=40, shuffle=False)
test_loader = torch_geometric.loader.DataLoader(test_dataset, batch_size=40, shuffle=False)

# Initialize the model, optimizer, and loss function
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GNN(in_channels=gnn_dataset_2[0].x.shape[1], hidden_channels=5, out_channels=1).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
criterion = torch.nn.BCELoss()

# Training loop
def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_attr)
        loss = criterion(out, data.z) # data.y should be your target variable
        print("TRAIN>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        print(out)
        print(data.z)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

# Testing loop
def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data.x, data.edge_index, data.edge_attr)
        print("TEST>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
        print(out)
        print(data.z)
        pred = (out > 0.5).float()  # Threshold at 0.5
        correct += (pred == data.z).sum().item()
    return correct / len(loader.dataset)


# Training and evaluation
for epoch in range(1, 100):  # Adjust number of epochs
    loss = train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        [0.1965],
        ...,
        [0.1965],
        [0.1965],
        [0.1965]], device='cuda:0', grad_fn=<SigmoidBackward0>)
tensor([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [1.]], device='cuda:0')
TEST>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tensor([[0.1965],
        [0.1965],
        [0.1965],
        ...,
        [0.1965],
        [0.1965],
        [0.1965]], device='cuda:0', grad_fn=<SigmoidBackward0>)
tensor([[1.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [1.]], device='cuda:0')
TEST>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
tensor([[0.1965],
        [0.1965],
        [0.1965],
        ...,
        [0.1965],
        [0.1965],
        [0.1965]], device='cuda:0', grad_fn=<SigmoidBackward0>)
tensor([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]], device='cuda:0')
TEST>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>