In [2]:
import os
import pandas as pd
import osmnx as ox
import networkx as nx
from tqdm import tqdm

In [11]:
def process_and_save_network():
    # loading original road network
    input_path = '../data/london_bike_network.graphml'
    output_path = '../data/london_bike_network_accident_score.graphml'
    
    # If the processed file already exists, load it directly
    if os.path.exists(output_path):
        print("load the processed network...")
        return ox.load_graphml(output_path)
    
    print("Process and save the network...")
    G = ox.load_graphml(input_path)
    
    
    # Node and edge attribute convertion
    node_types = {
        "street_count": int,
        "x": float,
        "y": float
    }
    
    edge_types = {
        'osmid': str,
        'access': str,
        'highway': str,
        'maxspeed': str,
        'name': str,
        'oneway': bool,
        'reversed': bool,
        'length': float
    }
    
    for attr_name, dtype in tqdm(node_types.items(), desc="convert node attributes"):
        attrs = nx.get_node_attributes(G, attr_name) 
        attrs_converted = {node: dtype(value) for node, value in attrs.items()}
        nx.set_node_attributes(G, attrs_converted, name=attr_name)
    
    for attr_name, dtype in tqdm(edge_types.items(), desc="convert edge attrbiutes"):
        attrs = nx.get_edge_attributes(G, attr_name)
        attrs_converted = {edge: dtype(value) for edge, value in attrs.items()}
        nx.set_edge_attributes(G, attrs_converted, name=attr_name)
    
    G = nx.relabel_nodes(G, {node: int(node) for node in G.nodes}) # relabel the node to an integer
    
    
    # Handle missing values and fill in default values
    default_node_attr = {
        'highway': '',
        'junction': '',
        'railway': ''
    }
    
    default_edge_attr = {
        'access': 'unknown',
        'maxspeed': '20 mph',
        'lanes': '1.5',
        'bridge': 'yes',
        'service': 'unknown',
        'junction': 'approach',
        'tunnel': 'yes',
    }

    for node, data in tqdm(G.nodes(data=True), desc="handle node missing values"):
        for attr, default_value in default_node_attr.items():
            if attr in data:
                val = data[attr]
                 # If the value is None or NaN, fill in the default value
                if val is None or (isinstance(val, float) and np.isnan(val)):
                    G.nodes[node][attr] = default_value

    for u, v, k, data in tqdm(G.edges(keys=True, data=True), desc="handle edge missing values"):
        for attr, default_value in default_edge_attr.items():
            if attr in data:
                val = data[attr]
                if val is None or (isinstance(val, float) and np.isnan(val)):
                    G.edges[u, v, k][attr] = default_value
                    
    
    # load the accident dataset
    df_acc = pd.read_csv("../data/road_accident.csv")
    
    nx.set_edge_attributes(G, 0.0, "accident_score") 
    nx.set_edge_attributes(G, 0, "casualty_count")    
    
    coords = list(zip(df_acc["Longitude"], df_acc["Latitude"]))
    
    # batch find the nearest edge
    nearest_edges = ox.distance.nearest_edges(
        G,
        X=[lon for lon, lat in coords],
        Y=[lat for lon, lat in coords],
        return_dist=False
    )
    
    severity_score_map = {"Slight": 0.1, "Serious": 0.5, "Fatal": 1.0}
    
    # Batch update scores and casualty counts
    for i, edge in enumerate(tqdm(nearest_edges, desc="add accident data")):
        try:
            casualty_count = df_acc.at[i, "_Casualty Count"]
            if pd.isna(casualty_count):
                casualty_count = 0
            else:
                casualty_count = int(casualty_count) 
            

            severity = df_acc.at[i, "_Casualty Severity"]
            score = -casualty_count * severity_score_map.get(severity, 0.0)
            
            G.edges[edge]["accident_score"] += score
            G.edges[edge]["casualty_count"] += casualty_count
            
        except Exception as e:
            print(f"Line {i} failed to process: {e}")
    

    ox.save_graphml(G, output_path)
    print(f"The processed network is saved to: {output_path}")
    return G

In [4]:
G = process_and_save_network()

Process and save the network...


convert node attributes: 100%|███████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  2.79it/s]
convert edge attrbiutes: 100%|███████████████████████████████████████████████████████████| 8/8 [00:09<00:00,  1.16s/it]
handle node missing values: 100%|█████████████████████████████████████████| 327070/327070 [00:00<00:00, 2108164.32it/s]
handle edge missing values: 100%|██████████████████████████████████████████| 746627/746627 [00:00<00:00, 881709.93it/s]
add accident data: 100%|█████████████████████████████████████████████████████| 15589/15589 [00:00<00:00, 102713.43it/s]


The processed network is saved to: ../data/london_bike_network_accident_score2.0.graphml
