In [None]:
import pickle

import igraph
import matplotlib.pyplot as plt
import numpy as np
import osmnx as ox
import pandas as pd

from config import GRAPH_DIR, CPH_G_GRAPHML, DATA_DIR

In [None]:
ox_graph = ox.load_graphml(GRAPH_DIR / CPH_G_GRAPHML)
with open("polaris-igraph.pkl", 'rb') as f:
    ig_graph: igraph.Graph = pickle.load(f)

In [None]:
ig_edges = {e['id']: e for e in ig_graph.es}

data, missing_road_ids = [], []
for from_node, to_node, link_data in ox_graph.edges(data=True):
    osm_id = link_data['osmid']
    if isinstance(osm_id, list):
        continue
    # osm_ids = osm_id if isinstance(osm_id, list) else [osm_id]

    ox_length = ig_length = 0
    ox_length += link_data['length']
    if str(osm_id) not in ig_edges:
        missing_road_ids.append(osm_id)
        continue
    ig_length += ig_edges[str(osm_id)]['length']

    # for osm_id in osm_ids:
    #     osm_id = str(osm_id)
    #     if osm_id not in ig_edges:
    #         missing_road_ids.append(osm_id)
    #         continue
    #     ig_length += ig_edges[osm_id]['length']

    diff = ox_length - ig_length
    data.append({'osm_id': osm_id, 'ox_len': ox_length, 'ig_len': ig_length, 'diff': diff})

df = pd.DataFrame(data)
df['comparison'] = df['diff'].apply(lambda x: 'shorter' if x < 0 else 'longer' if x > 0 else 'equal')

In [None]:
df['comparison'].value_counts().plot(kind='bar')
plt.title('Count of ox_length vs ig_length')
plt.xlabel('ox_length compared to ig_length')
plt.ylabel('Number of road segments')
plt.savefig(DATA_DIR / 'ox_vs_ig_count.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
bin_width = 1
bins = np.arange(df['diff'].min(), df['diff'].max() + bin_width, bin_width)

df['diff'].hist(bins=bins)
plt.title('Distribution of ox_length - ig_length length differences')
plt.xlabel('ox_length - ig_length')
plt.ylabel('Frequency')
plt.axvline(0, color='black', linestyle='dashed')
plt.savefig(DATA_DIR / 'ox_vs_ig_diff_hist.png')
plt.show()

In [None]:
df.boxplot(column='diff')
plt.title('Summary of Length Differences (ox_length - ig_length)')
plt.ylabel('Difference in length')
plt.savefig(DATA_DIR / 'ox_vs_ig_diff_box.png')
plt.show()

In [None]:
# Compute percentage difference
df['percent_diff'] = (df['ox_len'] - df['ig_len']) / df['ox_len'] * 100
df['percent_diff'].hist(bins=50)
plt.title('Percentage Difference: ig_length under/over ox_length')
plt.xlabel('((ox - ig) / ox) * 100')
plt.ylabel('Frequency')
plt.axvline(0, color='black', linestyle='dashed', label='Exact Match')
plt.legend()
plt.savefig(DATA_DIR / 'ox_vs_ig_percent_diff_hist.png')
plt.show()

In [None]:
df.head()

# OSM IDs in more than 1 row
duplicate_osm_ids = 0
for osm_id, group in df.groupby('osm_id'):
    if len(group) > 1:
        duplicate_osm_ids += 1
print(duplicate_osm_ids)