In [None]:
import math
import pickle

import igraph
import matplotlib.pyplot as plt
import numpy as np
import osmnx as ox
import pandas as pd

from config import GRAPH_DIR, CPH_G_GRAPHML, DATA_DIR

In [None]:
ox_graph = ox.load_graphml(GRAPH_DIR / CPH_G_GRAPHML)
with open("amager-polaris-3_igraph.pkl", "rb") as f:
    ig_graph: igraph.Graph = pickle.load(f)

In [None]:
ig_edges = {e['id']: e for e in ig_graph.es}

data, missing_road_ids = [], []
for from_node, to_node, link_data in ox_graph.edges(data=True):
    osm_id = link_data['osmid']
    if isinstance(osm_id, list):
        continue
    # osm_ids = osm_id if isinstance(osm_id, list) else [osm_id]

    ox_length = ig_length = 0
    ox_length += link_data['length']
    if str(osm_id) not in ig_edges:
        missing_road_ids.append(osm_id)
        continue
    ig_length += ig_edges[str(osm_id)]['length']

    # for osm_id in osm_ids:
    #     osm_id = str(osm_id)
    #     if osm_id not in ig_edges:
    #         missing_road_ids.append(osm_id)
    #         continue
    #     ig_length += ig_edges[osm_id]['length']

    diff = ox_length - ig_length
    data.append({'osm_id': osm_id, 'ox_len': ox_length, 'ig_len': ig_length, 'diff': diff})

df = pd.DataFrame(data)
df['comparison'] = df['diff'].apply(
    lambda x: 'NetworkX < SUMO' if x < 0 else 'NetworkX > SUMO' if x > 0 else 'NetworkX = SUMO'
)

In [None]:
df['comparison'].value_counts().plot(kind='bar')
plt.title('Count of NetworkX vs SUMO Edge Length Comparisons')
plt.xlabel('NetworkX length compared to SUMO length')
plt.ylabel('Number of road segments')
plt.xticks(rotation=0)
plt.savefig(DATA_DIR / 'networkx_vs_sumo_count.png', dpi=300, bbox_inches='tight')
plt.show()

# Print number of edges in each category
print(df['comparison'].value_counts())

In [None]:
bin_width = 1
bins = np.arange(math.floor(df['diff'].min()), math.ceil(df['diff'].max()) + bin_width, bin_width)

df['diff'].hist(bins=bins)
plt.title('Distribution of NetworkX - SUMO Edge Length Differences')
plt.xlabel('NetworkX length − SUMO length (meters)')
plt.ylabel('Frequency')
plt.axvline(0, color='black', linestyle='dashed', label='No difference')
plt.legend()
plt.savefig(DATA_DIR / 'networkx_vs_sumo_diff_hist.png', dpi=300, bbox_inches='tight')
plt.show()

# Count bins and print the top 15
counts = df['diff'].value_counts(bins=bins)
top_15_bins = counts.nlargest(15)
print("Top 15 bins with the most counts:")
for bin, count in top_15_bins.items():
    print(f"Bin: {bin}, Count: {count}")

In [None]:
df.boxplot(column='diff')
plt.title('Summary of Edge Length Differences (NetworkX - SUMO)')
plt.ylabel('NetworkX length − SUMO length (meters)')
plt.savefig(DATA_DIR / 'networkx_vs_sumo_diff_box.png', dpi=300, bbox_inches='tight')

plt.show()

In [None]:
# Compute percentage difference
df['percent_diff'] = (df['ox_len'] - df['ig_len']) / df['ox_len'] * 100

df['percent_diff'].hist(bins=50)
plt.title('Percentage Difference: SUMO Length Relative to NetworkX')
plt.xlabel('((NetworkX − SUMO) / NetworkX) × 100')
plt.ylabel('Frequency')
plt.axvline(0, color='black', linestyle='dashed', label='Exact Match')
plt.legend()
plt.savefig(DATA_DIR / 'networkx_vs_sumo_percent_diff_hist.png', dpi=300, bbox_inches='tight')
plt.show()

# Print how many are above 90%
print(f"Number of edges with 100% difference: {len(df[df['percent_diff'] >= 90])}")

In [None]:
df.head()

# OSM IDs in more than 1 row
duplicate_osm_ids = 0
for osm_id, group in df.groupby('osm_id'):
    if len(group) > 1:
        duplicate_osm_ids += 1
print(duplicate_osm_ids)

In [None]:
ravenna_ox_graph = ox.load_graphml(GRAPH_DIR / "Ravenna.graphml")
speed_limits = []


def extract_speed_limits(graph):
    speed_limits = []
    for _, _, data in graph.edges(data=True):
        maxspeed = data.get("maxspeed")
        if maxspeed:
            try:
                if isinstance(maxspeed, list):
                    values = [float(x) for x in maxspeed]
                    speed_limits.append(round(sum(values) / len(values)))
                else:
                    speed_limits.append(round(float(maxspeed)))
            except ValueError:
                continue
    return pd.Series(speed_limits)


ox_speeds = extract_speed_limits(ox_graph)
ravenna_speeds = extract_speed_limits(ravenna_ox_graph)

ox_pct = ox_speeds.value_counts(normalize=True).sort_index() * 100
ravenna_pct = ravenna_speeds.value_counts(normalize=True).sort_index() * 100
comparison_df = pd.concat([ox_pct, ravenna_pct], axis=1, keys=["Amager (%)", "Ravenna (%)"]).fillna(0)

comparison_df.plot(kind="bar", figsize=(12, 6))
plt.title("Speed Limit Distribution Comparison: Amager vs Ravenna")
plt.xlabel("Speed Limit (km/h)")
plt.ylabel("Percentage of Road Segments")
plt.xticks(rotation=0)
plt.grid(True)
plt.tight_layout()
plt.savefig(DATA_DIR / "speed_limit_comparison.png", dpi=300, bbox_inches="tight")
plt.show()