## Speeds comparison

In [1]:
import os
import sys
import yaml
from pathlib import Path

import geopandas as gpd
import osmnx as ox
from ohsome import OhsomeClient
from shapely.geometry import LineString, MultiLineString

datetime = '2024-03-15T00:00:00Z'

ox.settings.use_cache=False
ox.settings.log_console=True
ox.settings.overpass_settings = f'[out:json][date: "{datetime}"]'

data_dir = Path('./data')
ca_lulc_dir = Path('../lulc-utility')
out_dir = Path('./outdir')

In [2]:
aoi_gdf = gpd.read_file(data_dir / 'MA-HD-RNK.geojson')
aoi_bpoly = aoi_gdf.to_json()

## Compare querying road network

### Using OSMnx

In [3]:
%%timeit -r3

graph = ox.graph_from_polygon(aoi_gdf.unary_union, network_type='all', simplify=False, retain_all=True)
response_gdf = ox.graph_to_gdfs(graph, nodes=False, fill_edge_geometry=True)

1min 26s ± 525 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


### Using Ohsome API

In [4]:
client = OhsomeClient()

In [5]:
%%timeit -r3

responses = []
response = client.elements.geometry.post(
    bpolys = aoi_bpoly, 
    filter = 'highway=* and type:way and geometry:line',
    time = datetime,
    properties = 'tags'
)
road_network_gdf = response.as_dataframe()

15.9 s ± 166 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


## Cache formats

Query full road network using Ohsome API

In [7]:
client = OhsomeClient()

responses = []
try:
    response = client.elements.geometry.post(
        bpolys = aoi_bpoly, 
        filter = 'highway=* and type:way and geometry:line',
        time = datetime,
        properties = 'tags'
    )
    road_network_gdf = response.as_dataframe()
except Exception as err:
    print(f'Could not send request to ohsome API: {err}')
    sys.exit()

In [8]:
len(road_network_gdf)

141586

Preprocess

In [10]:
preprocessed_road_network = road_network_gdf.copy()

preprocessed_road_network = preprocessed_road_network[preprocessed_road_network['geometry'].apply(lambda geom: isinstance(geom, (LineString, MultiLineString)))]

with open(data_dir / 'road_network_asset.yaml', 'r') as src:
    asset = yaml.safe_load(src)
    tags_list = asset['tags']
    road_types_list = asset['road_types']
tags = [tag.strip() for tag in tags_list]
tags_to_keep = [col for col in tags if col in preprocessed_road_network.columns]

preprocessed_road_network = preprocessed_road_network[tags_to_keep]
    
preprocessed_road_network = preprocessed_road_network[preprocessed_road_network['highway'].isin(road_types_list)]

len(preprocessed_road_network)

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)


134754

In [11]:
preprocessed_road_network.to_crs(epsg=25832, inplace=True)
round(preprocessed_road_network.geometry.length.sum() / 1000, 2)

17606.58

### Test csv format

In [None]:
file = out_dir / 'preprocessed_road_network_test.csv'

In [None]:
%%timeit -r10

preprocessed_road_network.to_csv(file, index=False)

2.06 s ± 64.8 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


In [None]:
%%timeit -r10

road_network = pd.read_csv(file, dtype={2: str, 8: str})
road_network['geometry'] = road_network['geometry'].astype(object).apply(wkt.loads)

1.11 s ± 67.3 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


In [None]:
round(os.path.getsize(file) / 1024, 2)

35532.19

In [None]:
os.remove(file)

### Test geojson format

In [None]:
file = out_dir / 'preprocessed_road_network_test.geojson'

In [None]:
%%timeit -r5 # takes too long for more runs

preprocessed_road_network.to_file(file, driver='GeoJSON')

34.9 s ± 1.34 s per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [None]:
%%timeit -r5 # takes too long for more runs

road_network = gpd.read_file(file)

  as_dt = pd.to_datetime(df[k], errors="ignore")
  as_dt = pd.to_datetime(df[k], errors="ignore")
  as_dt = pd.to_datetime(df[k], errors="ignore")
  as_dt = pd.to_datetime(df[k], errors="ignore")
  as_dt = pd.to_datetime(df[k], errors="ignore")


15.7 s ± 246 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


  as_dt = pd.to_datetime(df[k], errors="ignore")


In [None]:
round(os.path.getsize(file) / 1024, 2)

107316.39

In [None]:
os.remove(file)

### Test feather format

In [None]:
file = out_dir / 'preprocessed_road_network_test.feather'

In [None]:
%%timeit -r25

preprocessed_road_network.to_feather(file)

241 ms ± 11.4 ms per loop (mean ± std. dev. of 25 runs, 1 loop each)


In [None]:
%%timeit -r25

road_network = gpd.read_feather(file)

185 ms ± 8.78 ms per loop (mean ± std. dev. of 25 runs, 10 loops each)


In [None]:
round(os.path.getsize(file) / 1024, 2)

16166.79

In [None]:
os.remove(file)

### Test parquet format

In [None]:
file = out_dir / 'preprocessed_road_network_test.parquet'

In [None]:
%%timeit -r25

preprocessed_road_network.to_parquet(file)

265 ms ± 23.5 ms per loop (mean ± std. dev. of 25 runs, 1 loop each)


In [None]:
%%timeit -r25

road_network = gpd.read_parquet(file)

186 ms ± 16.5 ms per loop (mean ± std. dev. of 25 runs, 1 loop each)


In [None]:
round(os.path.getsize(file) / 1024, 2)

13991.57

In [None]:
os.remove(file)

### Test pickle format

In [None]:
file = out_dir / 'preprocessed_road_network_test.pkl'

In [None]:
%%timeit -r25

preprocessed_road_network.to_pickle(file)

340 ms ± 30.1 ms per loop (mean ± std. dev. of 25 runs, 1 loop each)


In [None]:
%%timeit -r25

road_network = pd.read_pickle(file)

235 ms ± 27.8 ms per loop (mean ± std. dev. of 25 runs, 1 loop each)


In [None]:
round(os.path.getsize(file) / 1024, 2)

34321.42

In [None]:
os.remove(file)