From 68b9ce0d17c849d2566a2f2f36b6cea592a4f428 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:10:50 +0100 Subject: [PATCH 01/27] Make building way geometries generic by checking automatically whether geometry should be Polygon or LineString. --- pyrosm/geometry.pxd | 2 +- pyrosm/geometry.pyx | 147 ++++++++++++++++++++++++-------------------- 2 files changed, 81 insertions(+), 68 deletions(-) diff --git a/pyrosm/geometry.pxd b/pyrosm/geometry.pxd index bc9ac40..4280754 100644 --- a/pyrosm/geometry.pxd +++ b/pyrosm/geometry.pxd @@ -1,8 +1,8 @@ cpdef create_point_geometries(xarray, yarray) cdef _create_point_geometries(xarray, yarray) +cdef _create_way_geometries(node_coordinates, way_elements) cpdef create_way_geometries(node_coordinates, way_elements) cdef create_pygeos_polygon_from_relation(node_coordinates, relation_ways, member_roles) -cpdef create_polygon_geometries(node_coordinates, way_elements) cdef create_linear_ring(coordinates) cpdef create_node_coordinates_lookup(nodes) cdef pygeos_to_shapely(geom) diff --git a/pyrosm/geometry.pyx b/pyrosm/geometry.pyx index fa5fd46..f5e1ccc 100644 --- a/pyrosm/geometry.pyx +++ b/pyrosm/geometry.pyx @@ -74,34 +74,6 @@ cdef _create_point_geometries(xarray, yarray): dtype=object)) -cdef _create_way_geometries(node_coordinates, way_elements): - cdef long long node - cdef list coords, way_nodes - cdef int i, ii, nn, n = len(way_elements['id']) - - geometries = [] - for i in range(0, n): - way_nodes = way_elements['nodes'][i] - coords = [] - nn = len(way_nodes) - for ii in range(0, nn): - node = way_nodes[ii] - try: - coords.append((node_coordinates[node][0], - node_coordinates[node][1])) - except: - pass - if len(coords) > 1: - geometries.append(coords) - else: - geometries.append(None) - return to_shapely(np.array( - [linestrings(geom) - if geom is not None else None - for geom in geometries], - dtype=object)) - - cdef create_pygeos_polygon_from_relation(node_coordinates, relation_ways, member_roles): cdef int i, m_cnt cdef str role @@ -138,57 +110,98 @@ cdef create_pygeos_polygon_from_relation(node_coordinates, relation_ways, member return polygons(shell, holes) -cdef _create_polygon_geometries(node_coordinates, way_elements): - cdef long long node - cdef list coords - cdef int n = len(way_elements['id']) - cdef int i, ii, nn - geometries = [] +cpdef create_node_coordinates_lookup(nodes): + return _create_node_coordinates_lookup(nodes) - for i in range(0, n): - nodes_ = way_elements['nodes'][i] - coords = [] - nn = len(nodes_) - for ii in range(0, nn): - node = nodes_[ii] - try: - coords.append((node_coordinates[node][0], - node_coordinates[node][1])) - except: - pass +cpdef create_point_geometries(xarray, yarray): + return _create_point_geometries(xarray, yarray) - if len(coords) > 2: - try: - geometries.append(polygons(coords)) - except GEOSException as e: - # Some geometries might not be valid for creating a Polygon - # These might occur e.g. at the edge of the spatial extent - if "Invalid number of points in LinearRing" in str(e): - geometries.append(None) - else: - raise e - except Exception as e: + +cdef create_linestring_geometry(nodes, node_coordinates): + + coords = [] + n = len(nodes) + for i in range(0, n): + node = nodes[i] + try: + coords.append((node_coordinates[node][0], + node_coordinates[node][1])) + except: + pass + + if len(coords) > 1: + try: + return linestrings(coords) + except GEOSException as e: + if "Invalid number of points" in str(e): + return None + else: raise e + except Exception as e: + raise e - else: - geometries.append(None) + else: + return None - return to_shapely(geometries) +cdef create_polygon_geometry(nodes, node_coordinates): + cdef int i, n = len(nodes) + coords = [] + for i in range(0, n): + node = nodes[i] + try: + coords.append((node_coordinates[node][0], + node_coordinates[node][1])) + except: + pass + + if len(coords) > 2: + try: + return polygons(coords) + except GEOSException as e: + # Some geometries might not be valid for creating a Polygon + # These might occur e.g. at the edge of the spatial extent + if "Invalid number of points in LinearRing" in str(e): + return None + else: + raise e + except Exception as e: + raise e + else: + return None +cdef _create_way_geometries(node_coordinates, way_elements): + # Info for constructing geometries: + # https://wiki.openstreetmap.org/wiki/Way -cpdef create_node_coordinates_lookup(nodes): - return _create_node_coordinates_lookup(nodes) + cdef long long node + cdef list coords + cdef int n = len(way_elements['id']) + cdef int i + geometries = [] -cpdef create_point_geometries(xarray, yarray): - return _create_point_geometries(xarray, yarray) + for i in range(0, n): + nodes = way_elements['nodes'][i] + coords = [] + # If first and last node are the same, it's a closed way + if nodes[0] == nodes[-1]: + tag_keys = way_elements.keys() + # Create Polygon unless way is of type 'highway' or 'barrier' + if "highway" in tag_keys or "barrier" in tag_keys: + geom = create_linestring_geometry(nodes, node_coordinates) + else: + geom = create_polygon_geometry(nodes, node_coordinates) -cpdef create_way_geometries(node_coordinates, way_elements): - return _create_way_geometries(node_coordinates, way_elements) + # Otherwise create LineString + else: + geom = create_linestring_geometry(nodes, node_coordinates) + geometries.append(geom) -cpdef create_polygon_geometries(node_coordinates, way_elements): - return _create_polygon_geometries(node_coordinates, way_elements, ) + return to_shapely(geometries) + +cpdef create_way_geometries(node_coordinates, way_elements): + return _create_way_geometries(node_coordinates, way_elements) \ No newline at end of file From 84f9644f32b62a38dbd0783de8e9ab293e43813f Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:11:46 +0100 Subject: [PATCH 02/27] Use geodataframe generator. --- pyrosm/buildings.py | 33 ++++++--------------------------- pyrosm/networks.py | 13 +++++-------- pyrosm/pois.py | 38 ++++---------------------------------- 3 files changed, 15 insertions(+), 69 deletions(-) diff --git a/pyrosm/buildings.py b/pyrosm/buildings.py index 9a3a3da..696af91 100644 --- a/pyrosm/buildings.py +++ b/pyrosm/buildings.py @@ -1,7 +1,5 @@ from pyrosm.data_manager import get_osm_data -from pyrosm.geometry import create_polygon_geometries -from pyrosm.frames import create_gdf -from pyrosm.relations import prepare_relations +from pyrosm.frames import prepare_geodataframe import geopandas as gpd import warnings @@ -31,32 +29,13 @@ def get_building_data(node_coordinates, way_records, relations, tags_as_columns, ) # If there weren't any data, return empty GeoDataFrame - if ways is None: - warnings.warn("Could not find any buildings for given area.", + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any landuse elements for given area.", UserWarning, stacklevel=2) return gpd.GeoDataFrame() - # Create geometries for normal ways - geometries = create_polygon_geometries(node_coordinates, - ways) - - # Convert to GeoDataFrame - way_gdf = create_gdf(ways, geometries) - way_gdf["osm_type"] = "way" - - # Prepare relation data if it is available - if relations is not None: - relations = prepare_relations(relations, relation_ways, - node_coordinates, - tags_as_columns) - relation_gdf = gpd.GeoDataFrame(relations) - relation_gdf["osm_type"] = "relation" - - gdf = way_gdf.append(relation_gdf, ignore_index=True) - else: - gdf = way_gdf - - gdf = gdf.dropna(subset=['geometry']).reset_index(drop=True) - + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) return gdf diff --git a/pyrosm/networks.py b/pyrosm/networks.py index ed68481..95c4bc0 100644 --- a/pyrosm/networks.py +++ b/pyrosm/networks.py @@ -1,6 +1,5 @@ from pyrosm.data_manager import get_osm_data -from pyrosm.frames import create_gdf -from pyrosm.geometry import create_way_geometries +from pyrosm.frames import prepare_geodataframe import geopandas as gpd import warnings @@ -27,11 +26,9 @@ def get_network_data(node_coordinates, way_records, tags_as_columns, network_fil stacklevel=2) return gpd.GeoDataFrame() - geometries = create_way_geometries(node_coordinates, - ways) + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + return gdf - # Convert to GeoDataFrame - gdf = create_gdf(ways, geometries) - gdf = gdf.dropna(subset=['geometry']).reset_index(drop=True) - return gdf diff --git a/pyrosm/pois.py b/pyrosm/pois.py index 545047e..9a47654 100644 --- a/pyrosm/pois.py +++ b/pyrosm/pois.py @@ -1,9 +1,6 @@ from pyrosm.data_manager import get_osm_data -from pyrosm.geometry import create_polygon_geometries -from pyrosm.frames import create_gdf, create_nodes_gdf -from pyrosm.relations import prepare_relations +from pyrosm.frames import prepare_geodataframe import geopandas as gpd -import pandas as pd import warnings @@ -26,35 +23,8 @@ def get_poi_data(nodes, node_coordinates, way_records, relations, tags_as_column stacklevel=2) return gpd.GeoDataFrame() - if nodes is not None: - # Create GeoDataFrame from nodes - node_gdf = create_nodes_gdf(nodes) - node_gdf['osm_type'] = "node" - else: - node_gdf = gpd.GeoDataFrame() + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) - if ways is not None: - # Create geometries for normal ways - geometries = create_polygon_geometries(node_coordinates, - ways) - # Convert to GeoDataFrame - way_gdf = create_gdf(ways, geometries) - node_gdf['osm_type'] = "way" - else: - way_gdf = gpd.GeoDataFrame() - - # Prepare relation data if it is available - if relations is not None: - relations = prepare_relations(relations, relation_ways, - node_coordinates, - tags_as_columns) - relation_gdf = gpd.GeoDataFrame(relations) - node_gdf['osm_type'] = "relation" - - else: - relation_gdf = gpd.GeoDataFrame() - - # Merge all - gdf = pd.concat([node_gdf, way_gdf, relation_gdf]) - gdf = gdf.dropna(subset=['geometry']) return gdf From e4ff3d93780962ad895bd796ef4c7a4b2c510132 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:12:10 +0100 Subject: [PATCH 03/27] Add tests --- tests/test_landuse_parsing.py | 38 +++++++++++++++++++++++++++++++++++ tests/test_natural_parsing.py | 38 +++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 tests/test_natural_parsing.py diff --git a/tests/test_landuse_parsing.py b/tests/test_landuse_parsing.py index e69de29..f5b4515 100644 --- a/tests/test_landuse_parsing.py +++ b/tests/test_landuse_parsing.py @@ -0,0 +1,38 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +def test_parsing_landuse_with_defaults(test_pbf): + from pyrosm import OSM + from pyrosm.landuse import get_landuse_data + from geopandas import GeoDataFrame + import pyproj + from pyrosm._arrays import concatenate_dicts_of_arrays + osm = OSM(filepath=test_pbf) + osm._read_pbf() + tags_as_columns = osm.conf.tags.landuse + + nodes = concatenate_dicts_of_arrays(osm._nodes) + gdf = get_landuse_data(nodes, + osm._node_coordinates, + osm._way_records, + osm._relations, + tags_as_columns, + None) + + assert isinstance(gdf, GeoDataFrame) + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 50 + assert gdf.crs == pyproj.CRS.from_epsg(4326) diff --git a/tests/test_natural_parsing.py b/tests/test_natural_parsing.py new file mode 100644 index 0000000..c1ab5d4 --- /dev/null +++ b/tests/test_natural_parsing.py @@ -0,0 +1,38 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +def test_parsing_natural_with_defaults(test_pbf): + from pyrosm import OSM + from pyrosm.natural import get_natural_data + from geopandas import GeoDataFrame + import pyproj + from pyrosm._arrays import concatenate_dicts_of_arrays + osm = OSM(filepath=test_pbf) + osm._read_pbf() + tags_as_columns = osm.conf.tags.natural + + nodes = concatenate_dicts_of_arrays(osm._nodes) + gdf = get_natural_data(nodes, + osm._node_coordinates, + osm._way_records, + osm._relations, + tags_as_columns, + None) + + assert isinstance(gdf, GeoDataFrame) + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 14 + assert gdf.crs == pyproj.CRS.from_epsg(4326) From 0606e6453be17db15822e8fb9843472f53098eb5 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:12:42 +0100 Subject: [PATCH 04/27] Fix tests after changing the geometry creation logic. --- tests/test_building_parsing.py | 6 +++--- tests/test_network_parsing.py | 34 ++++++++++++++-------------------- tests/test_poi_parsing.py | 2 +- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/tests/test_building_parsing.py b/tests/test_building_parsing.py index 5d88d65..5b179c3 100644 --- a/tests/test_building_parsing.py +++ b/tests/test_building_parsing.py @@ -46,7 +46,7 @@ def test_parsing_building_elements(test_pbf): def test_creating_building_geometries(test_pbf): from pyrosm import OSM from pyrosm.data_manager import get_osm_data - from pyrosm.geometry import create_polygon_geometries + from pyrosm.geometry import create_way_geometries from numpy import ndarray from shapely.geometry import Polygon @@ -61,8 +61,8 @@ def test_creating_building_geometries(test_pbf): filter_type="keep") assert isinstance(ways, dict) - geometries = create_polygon_geometries(osm._node_coordinates, - ways) + geometries = create_way_geometries(osm._node_coordinates, + ways) assert isinstance(geometries, ndarray) assert isinstance(geometries[0], Polygon) assert len(geometries) == len(ways["id"]) diff --git a/tests/test_network_parsing.py b/tests/test_network_parsing.py index 1b6afe7..c40e104 100644 --- a/tests/test_network_parsing.py +++ b/tests/test_network_parsing.py @@ -31,11 +31,11 @@ def test_filter_network_by_walking(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (238, 17) + assert gdf.shape == (238, 18) required_cols = ['access', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'id', - 'geometry', 'tags'] + 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -54,10 +54,11 @@ def test_filter_network_by_driving(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (200, 17) + assert gdf.shape == (200, 18) required_cols = ['access', 'bridge', 'highway', 'int_ref', 'lanes', 'lit', 'maxspeed', - 'name', 'oneway', 'ref', 'service', 'surface', 'id', 'geometry', 'tags'] + 'name', 'oneway', 'ref', 'service', 'surface', 'id', 'geometry', 'tags', + 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -77,11 +78,11 @@ def test_filter_network_by_cycling(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (290, 19) + assert gdf.shape == (290, 20) required_cols = ['access', 'bicycle', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'tunnel', - 'id', 'geometry', 'tags'] + 'id', 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -95,7 +96,6 @@ def test_saving_network_to_shapefile(test_pbf, test_output_dir): from pyrosm import OSM import geopandas as gpd import shutil - from pandas.testing import assert_frame_equal if not os.path.exists(test_output_dir): os.makedirs(test_output_dir) @@ -108,15 +108,9 @@ def test_saving_network_to_shapefile(test_pbf, test_output_dir): # Ensure it can be read and matches with original one gdf2 = gpd.read_file(temp_path) - # When reading integers they - # might be imported as strings instead of ints which is - # normal, however, the values should be identical - convert_to_ints = ["id", "timestamp", "version"] - for col in convert_to_ints: - gdf[col] = gdf[col].astype(int) - gdf2[col] = gdf2[col].astype(int) - - assert_frame_equal(gdf, gdf2) + cols = gdf.columns + for col in cols: + assert gdf[col].tolist() == gdf2[col].tolist() # Clean up shutil.rmtree(test_output_dir) @@ -136,11 +130,11 @@ def test_parse_network_with_bbox(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (65, 17) + assert gdf.shape == (65, 18) required_cols = ['access', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'id', - 'geometry', 'tags'] + 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -168,11 +162,11 @@ def test_parse_network_with_shapely_bbox(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (65, 17) + assert gdf.shape == (65, 18) required_cols = ['access', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'id', - 'geometry', 'tags'] + 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns diff --git a/tests/test_poi_parsing.py b/tests/test_poi_parsing.py index 9330818..fc556b0 100644 --- a/tests/test_poi_parsing.py +++ b/tests/test_poi_parsing.py @@ -59,5 +59,5 @@ def test_parsing_pois_with_defaults(helsinki_pbf, default_filter): assert col in gdf.columns # Test shape - assert len(gdf) == 1777 + assert len(gdf) == 1780 assert gdf.crs == pyproj.CRS.from_epsg(4326) From 58babfdc60a853cc867fa23aee4d04935b0db4e0 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:13:14 +0100 Subject: [PATCH 05/27] Create a generic method for building geodataframes. --- pyrosm/frames.pxd | 6 ++++- pyrosm/frames.pyx | 57 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/pyrosm/frames.pxd b/pyrosm/frames.pxd index 46d6df4..6d641c1 100644 --- a/pyrosm/frames.pxd +++ b/pyrosm/frames.pxd @@ -1,2 +1,6 @@ cpdef create_nodes_gdf(node_dict_list) -cpdef create_gdf(data_records, geometry_array) \ No newline at end of file +cpdef create_gdf(data_records, geometry_array) +cpdef prepare_way_gdf(node_coordinates, ways) +cpdef prepare_node_gdf(nodes) +cpdef prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) \ No newline at end of file diff --git a/pyrosm/frames.pyx b/pyrosm/frames.pyx index 6346222..97e53a1 100644 --- a/pyrosm/frames.pyx +++ b/pyrosm/frames.pyx @@ -2,9 +2,11 @@ import pandas as pd import geopandas as gpd from pyrosm._arrays cimport concatenate_dicts_of_arrays from pyrosm.geometry cimport _create_point_geometries - +from pyrosm.geometry cimport create_way_geometries +from pyrosm.relations import prepare_relations cpdef create_nodes_gdf(nodes): + cdef str k if isinstance(nodes, list): nodes = concatenate_dicts_of_arrays(nodes) df = pd.DataFrame() @@ -13,10 +15,9 @@ cpdef create_nodes_gdf(nodes): df['geometry'] = _create_point_geometries(nodes['lon'], nodes['lat']) return gpd.GeoDataFrame(df, crs='epsg:4326') - cpdef create_gdf(data_arrays, geometry_array): + cdef str key df = pd.DataFrame() - for key, data in data_arrays.items(): # When inserting nodes, # those should be converted @@ -28,3 +29,53 @@ cpdef create_gdf(data_arrays, geometry_array): df['geometry'] = geometry_array return gpd.GeoDataFrame(df, crs='epsg:4326') + +cpdef prepare_way_gdf(node_coordinates, ways): + if ways is not None: + geometries = create_way_geometries(node_coordinates, + ways) + # Convert to GeoDataFrame + way_gdf = create_gdf(ways, geometries) + way_gdf['osm_type'] = "way" + else: + way_gdf = gpd.GeoDataFrame() + return way_gdf + +cpdef prepare_node_gdf(nodes): + if nodes is not None: + # Create GeoDataFrame from nodes + node_gdf = create_nodes_gdf(nodes) + node_gdf['osm_type'] = "node" + else: + node_gdf = gpd.GeoDataFrame() + return node_gdf + +cpdef prepare_relation_gdf(node_coordinates, relations, relation_ways, tags_as_columns): + if relations is not None: + relations = prepare_relations(relations, relation_ways, + node_coordinates, + tags_as_columns) + + relation_gdf = gpd.GeoDataFrame(relations) + relation_gdf['osm_type'] = "relation" + + else: + relation_gdf = gpd.GeoDataFrame() + return relation_gdf + +cpdef prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, + tags_as_columns): + # Prepare nodes + node_gdf = prepare_node_gdf(nodes) + + # Prepare ways + way_gdf = prepare_way_gdf(node_coordinates, ways) + + # Prepare relation data + relation_gdf = prepare_relation_gdf(node_coordinates, relations, relation_ways, tags_as_columns) + + # Merge all + gdf = pd.concat([node_gdf, way_gdf, relation_gdf]) + gdf = gdf.dropna(subset=['geometry']).reset_index(drop=True) + return gdf From f7724b222ae0802c35076bd69e718f399bfbc938 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:13:52 +0100 Subject: [PATCH 06/27] Add landuse and natural --- README.md | 9 ++-- pyrosm/pyrosm.py | 112 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 111 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index fcd5ef2..8c2165a 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,12 @@ [![PyPI version](https://badge.fury.io/py/pyrosm.svg)](https://badge.fury.io/py/pyrosm)[![build status](https://api.travis-ci.org/HTenkanen/pyrosm.svg?branch=master)](https://travis-ci.org/HTenkanen/pyrosm)[![Coverage Status](https://codecov.io/gh/HTenkanen/pyrosm/branch/master/graph/badge.svg)](https://codecov.io/gh/HTenkanen/pyrosm) **Pyrosm** is a Python library for reading OpenStreetMap from `protobuf` files (`*.osm.pbf`) into Geopandas GeoDataFrames. -Pyrosm makes it easy to extract various datasets from OpenStreetMap pbf-dumps including e.g. road networks and buildings (points of interest in progress). +Pyrosm makes it easy to extract various datasets from OpenStreetMap pbf-dumps including e.g. road networks, buildings, +Points of Interest (POI) and landuse. Also fully customized queries are supported which makes it possible to parse the data +from OSM with more specific filters. -**Pyrosm** is easy to use and it provides a somewhat similar user interface as another popular Python library [OSMnx](https://github.com/gboeing/osmnx) -for parsing different datasets from the OpenStreetMap pbf-dump including road networks, buildings and Points of Interest (later also landuse and possibility to make customized calls). +**Pyrosm** is easy to use and it provides a somewhat similar user interface as [OSMnx](https://github.com/gboeing/osmnx). The main difference between pyrosm and OSMnx is that OSMnx reads the data over internet using OverPass API, whereas pyrosm reads the data from local OSM data dumps that can be downloaded e.g. from [GeoFabrik's website](http://download.geofabrik.de/). This makes it possible to read data much faster thus allowing e.g. parsing street networks for whole country in a matter of minutes instead of hours (however, see [caveats](#caveats)). @@ -26,7 +27,7 @@ which is also used by OpenStreetMap contributors to distribute the OSM data in P - read buildings from PBF - read Points of Interest (POI) from PBF - filter data based on bounding box - - apply custom filter to filter data + - apply custom criteria with buildings and POIs to filter the data - e.g. keeping only specific type of buildings can be done by applying a filter: `{'building': ['residential', 'retail']}` diff --git a/pyrosm/pyrosm.py b/pyrosm/pyrosm.py index 0246198..4bc7bc6 100644 --- a/pyrosm/pyrosm.py +++ b/pyrosm/pyrosm.py @@ -1,13 +1,16 @@ from pyrosm.config import Conf from pyrosm.pbfreader import parse_osm_data -from pyrosm.networks import get_network_data -from pyrosm.buildings import get_building_data from pyrosm._arrays import concatenate_dicts_of_arrays -from pyrosm.pois import get_poi_data from pyrosm.geometry import create_node_coordinates_lookup from pyrosm.frames import create_nodes_gdf from shapely.geometry import Polygon, MultiPolygon +from pyrosm.buildings import get_building_data +from pyrosm.landuse import get_landuse_data +from pyrosm.natural import get_natural_data +from pyrosm.networks import get_network_data +from pyrosm.pois import get_poi_data + class OSM: from pyrosm.utils._compat import PYGEOS_SHAPELY_COMPAT @@ -141,6 +144,13 @@ def get_buildings(self, custom_filter=None): To keep only specific buildings such as 'residential' and 'retail', you can apply a custom filter which is a Python dictionary with following format: `custom_filter={'building': ['residential', 'retail']}` + + Further info + ------------ + + See OSM documentation for details about the data: + https://wiki.openstreetmap.org/wiki/Key:building + """ # Default tags to keep as columns tags_as_columns = self.conf.tags.building @@ -161,6 +171,99 @@ def get_buildings(self, custom_filter=None): gdf = gdf.drop("nodes", axis=1) return gdf + def get_landuse(self, custom_filter=None): + """ + Parses landuse from OSM. + + Parameters + ---------- + + custom_filter : dict + What kind of landuse to parse, see details below. + + You can opt-in specific elements by using 'custom_filter'. + To keep only specific landuse such as 'construction' and 'industrial', you can apply + a custom filter which is a Python dictionary with following format: + `custom_filter={'landuse': ['construction', 'industrial']}` + + Further info + ------------ + + See OSM documentation for details about the data: + https://wiki.openstreetmap.org/wiki/Key:landuse + """ + + if self._nodes is None or self._way_records is None: + self._read_pbf() + + # Default tags to keep as columns + tags_as_columns = self.conf.tags.landuse + + # If nodes are still in chunks, merge before passing forward + if isinstance(self._nodes, list): + self._nodes = concatenate_dicts_of_arrays(self._nodes) + + gdf = get_landuse_data(self._nodes, + self._node_coordinates, + self._way_records, + self._relations, + tags_as_columns, + custom_filter) + + # Do not keep node information unless specifically asked for + # (they are in a list, and can cause issues when saving the files) + if not self.keep_node_info: + if "nodes" in gdf.columns: + gdf = gdf.drop("nodes", axis=1) + return gdf + + + def get_natural(self, custom_filter=None): + """ + Parses natural from OSM. + + Parameters + ---------- + + custom_filter : dict + What kind of natural to parse, see details below. + + You can opt-in specific elements by using 'custom_filter'. + To keep only specific natural such as 'wood' and 'tree', you can apply + a custom filter which is a Python dictionary with following format: + `custom_filter={'natural': ['wood', 'tree']}` + + Further info + ------------ + + See OSM documentation for details about the data: + https://wiki.openstreetmap.org/wiki/Key:natural + """ + + if self._nodes is None or self._way_records is None: + self._read_pbf() + + # Default tags to keep as columns + tags_as_columns = self.conf.tags.natural + + # If nodes are still in chunks, merge before passing forward + if isinstance(self._nodes, list): + self._nodes = concatenate_dicts_of_arrays(self._nodes) + + gdf = get_natural_data(self._nodes, + self._node_coordinates, + self._way_records, + self._relations, + tags_as_columns, + custom_filter) + + # Do not keep node information unless specifically asked for + # (they are in a list, and can cause issues when saving the files) + if not self.keep_node_info: + if "nodes" in gdf.columns: + gdf = gdf.drop("nodes", axis=1) + return gdf + def get_pois(self, custom_filter=None): """ Parse Point of Interest (POI) from OSM. @@ -262,9 +365,6 @@ def get_pois(self, custom_filter=None): gdf = gdf.drop("nodes", axis=1) return gdf - def get_landuse(self, custom_filter=None): - raise NotImplementedError() - def __getattribute__(self, name): # If node-gdf is requested convert to gdf before returning if name == "_nodes_gdf": From ddbc4349c21614981528825d9f7d053636f569e4 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:14:02 +0100 Subject: [PATCH 07/27] Add landuse --- pyrosm/landuse.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 pyrosm/landuse.py diff --git a/pyrosm/landuse.py b/pyrosm/landuse.py new file mode 100644 index 0000000..46083c2 --- /dev/null +++ b/pyrosm/landuse.py @@ -0,0 +1,41 @@ +from pyrosm.data_manager import get_osm_data +from pyrosm.frames import prepare_geodataframe +import geopandas as gpd +import warnings + + +def get_landuse_data(nodes, node_coordinates, way_records, relations, tags_as_columns, custom_filter): + # If custom_filter has not been defined, initialize with default + if custom_filter is None: + custom_filter = {"landuse": True} + else: + # Check that the custom filter is in correct format + if not isinstance(custom_filter, dict): + raise ValueError(f"'custom_filter' should be a Python dictionary. " + f"Got {custom_filter} with type {type(custom_filter)}.") + + # Ensure that the "landuse" tag exists + if "landuse" not in custom_filter.keys(): + custom_filter["landuse"] = True + + # Call signature for fetching buildings + nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, + way_records=way_records, + relations=relations, + tags_as_columns=tags_as_columns, + data_filter=custom_filter, + filter_type="keep", + osm_keys=None + ) + + # If there weren't any data, return empty GeoDataFrame + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any landuse elements for given area.", + UserWarning, + stacklevel=2) + return gpd.GeoDataFrame() + + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + return gdf From 09d7659f8eea05b7f184b2f17ee83909d5c96e03 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:14:15 +0100 Subject: [PATCH 08/27] Add natural --- pyrosm/natural.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 pyrosm/natural.py diff --git a/pyrosm/natural.py b/pyrosm/natural.py new file mode 100644 index 0000000..925279b --- /dev/null +++ b/pyrosm/natural.py @@ -0,0 +1,41 @@ +from pyrosm.data_manager import get_osm_data +from pyrosm.frames import prepare_geodataframe +import geopandas as gpd +import warnings + + +def get_natural_data(nodes, node_coordinates, way_records, relations, tags_as_columns, custom_filter): + # If custom_filter has not been defined, initialize with default + if custom_filter is None: + custom_filter = {"natural": True} + else: + # Check that the custom filter is in correct format + if not isinstance(custom_filter, dict): + raise ValueError(f"'custom_filter' should be a Python dictionary. " + f"Got {custom_filter} with type {type(custom_filter)}.") + + # Ensure that the "landuse" tag exists + if "natural" not in custom_filter.keys(): + custom_filter["natural"] = True + + # Call signature for fetching buildings + nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, + way_records=way_records, + relations=relations, + tags_as_columns=tags_as_columns, + data_filter=custom_filter, + filter_type="keep", + osm_keys=None + ) + + # If there weren't any data, return empty GeoDataFrame + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any natural elements for given area.", + UserWarning, + stacklevel=2) + return gpd.GeoDataFrame() + + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + return gdf From a6f27b02f2da4b1605b5822c6e81a35f1c5237b8 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 12:16:31 +0100 Subject: [PATCH 09/27] Update docs --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8c2165a..1a503e8 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@ **Pyrosm** is a Python library for reading OpenStreetMap from `protobuf` files (`*.osm.pbf`) into Geopandas GeoDataFrames. Pyrosm makes it easy to extract various datasets from OpenStreetMap pbf-dumps including e.g. road networks, buildings, -Points of Interest (POI) and landuse. Also fully customized queries are supported which makes it possible to parse the data -from OSM with more specific filters. +Points of Interest (POI), landuse and natural elements. Also fully customized queries are supported which makes it possible +to parse the data from OSM with more specific filters. **Pyrosm** is easy to use and it provides a somewhat similar user interface as [OSMnx](https://github.com/gboeing/osmnx). @@ -26,6 +26,8 @@ which is also used by OpenStreetMap contributors to distribute the OSM data in P - read street networks (separately for driving, cycling, walking and all-combined) - read buildings from PBF - read Points of Interest (POI) from PBF + - read landuse from PBF + - read "natural" from PBF - filter data based on bounding box - apply custom criteria with buildings and POIs to filter the data - e.g. keeping only specific type of buildings can be done by applying a filter: `{'building': ['residential', 'retail']}` @@ -33,7 +35,6 @@ which is also used by OpenStreetMap contributors to distribute the OSM data in P ## Roadmap - - add parsing of landuse - add possibility to crop PBF and save a subset into new PBF. - add more tests From a0b39ee205cb5d561ea59dc4ed28ef9fcc152e9c Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 16:39:20 +0100 Subject: [PATCH 10/27] Add tests for custom filter. --- tests/test_custom_filter.py | 391 ++++++++++++++++++++++++++++++++++++ 1 file changed, 391 insertions(+) create mode 100644 tests/test_custom_filter.py diff --git a/tests/test_custom_filter.py b/tests/test_custom_filter.py new file mode 100644 index 0000000..674eb7d --- /dev/null +++ b/tests/test_custom_filter.py @@ -0,0 +1,391 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + +@pytest.fixture +def default_filter(): + return {"amenity": True, + "craft": True, + "historic": True, + "leisure": True, + "shop": True, + "tourism": True + } + + +@pytest.fixture +def test_output_dir(): + import os, tempfile + return os.path.join(tempfile.gettempdir(), "pyrosm_test_results") + + +def test_parsing_osm_with_custom_filter_by_excluding_tags(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + import pyproj + osm = OSM(filepath=test_pbf) + + # Keep only building as column + tags_as_columns = ["building"] + # Get all buildings except "residential" + custom_filter = {"building": ["residential"]} + filter_type = "exclude" + osm_type = "building" + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + filter_type=filter_type, + osm_keys_to_keep=osm_type, + tags_as_columns=tags_as_columns + ) + + assert isinstance(gdf, GeoDataFrame) + + # Only following columns should exist after specifying tags_as_columns + allowed_columns = ["geometry", "tags", "building", "id", "osm_type", + "version", "timestamp", "changeset"] + for col in gdf.columns: + assert col in allowed_columns + + # Building columns should not have any "residential" tags + assert "residential" not in gdf["building"].tolist() + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 1049 + assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_parsing_osm_with_custom_filter_by_including_tags(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + import pyproj + osm = OSM(filepath=test_pbf) + + # Keep only building as column + tags_as_columns = ["building"] + # Get all buildings that are "retail" + custom_filter = {"building": ["retail"]} + filter_type = "keep" + osm_type = "building" + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + filter_type=filter_type, + osm_keys_to_keep=osm_type, + tags_as_columns=tags_as_columns + ) + + assert isinstance(gdf, GeoDataFrame) + + # Only following columns should exist after specifying tags_as_columns + allowed_columns = ["geometry", "tags", "building", "id", "osm_type", + "version", "timestamp", "changeset"] + for col in gdf.columns: + assert col in allowed_columns + + # Building columns should not have any "residential" tags + assert len(gdf["building"].unique()) == 1 + assert gdf["building"].unique()[0] == "retail" + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 2 + assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_using_incorrect_filter(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + # Test that passing incorrect data works as should + # 1. + custom_filter = None + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "should be a Python dictionary" in str(e): + pass + else: + raise e + + custom_filter = {"building": [1]} + # 2. + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "string" in str(e): + pass + else: + raise e + + custom_filter = {"building": ["correct_string", 1]} + # 3. + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "string" in str(e): + pass + else: + raise e + # 4. + custom_filter = {0: ["residential"]} + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "string" in str(e): + pass + else: + raise e + + +def test_using_incorrect_tags(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + # Incorrect tags + # -------------- + tags_as_columns = [1] + custom_filter = {"building": ["retail"]} + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + tags_as_columns=tags_as_columns + ) + except ValueError as e: + if "All tags listed in 'tags_as_columns' should be strings" in str(e): + pass + else: + raise e + + +def test_using_incorrect_filter_type(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + custom_filter = {"building": ["retail"]} + filter_type = "incorrect_test" + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + filter_type=filter_type + ) + except ValueError as e: + if "should be either 'keep' or 'exclude'" in str(e): + pass + else: + raise e + + +def test_using_incorrect_booleans(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + custom_filter = {"building": ["retail"]} + incorrect_bool = "foo" + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_nodes=incorrect_bool + ) + except ValueError as e: + if "'keep_nodes' should be boolean type: True or False" in str(e): + pass + else: + raise e + + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_ways=incorrect_bool + ) + except ValueError as e: + if "'keep_ways' should be boolean type: True or False" in str(e): + pass + else: + raise e + + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_relations=incorrect_bool + ) + except ValueError as e: + if "'keep_relations' should be boolean type: True or False" in str(e): + pass + else: + raise e + + +def test_using_incorrect_osm_keys(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + osm_keys = 1 + custom_filter = {"building": ["retail"]} + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + osm_keys_to_keep=osm_keys + ) + except ValueError as e: + if "'osm_keys_to_keep' -parameter should be of type str or list." in str(e): + pass + else: + raise e + + +def test_reading_with_custom_filters_with_including(test_pbf): + from pyrosm import OSM + from shapely.geometry import Polygon + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=test_pbf) + gdf_all = osm.get_buildings() + + # Find out all 'building' tags + cnts = gdf_all['building'].value_counts() + for filter_, cnt in cnts.items(): + # Use the custom filter + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': [filter_]}, + filter_type="keep") + + assert isinstance(filtered, GeoDataFrame) + assert isinstance(filtered.loc[0, "geometry"], Polygon) + assert len(filtered) == cnt + # Now should only have buildings with given key + assert len(filtered["building"].unique()) == 1 + + required_cols = ['building', 'id', 'timestamp', 'version', 'geometry'] + + for col in required_cols: + assert col in filtered.columns + + +def test_reading_with_custom_filters_with_excluding(test_pbf): + from pyrosm import OSM + from shapely.geometry import Polygon + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=test_pbf) + gdf_all = osm.get_buildings() + + # Find out all 'building' tags + cnts = gdf_all['building'].value_counts() + n = len(gdf_all) + for filter_, cnt in cnts.items(): + # Use the custom filter + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': [filter_]}, + filter_type="exclude") + + assert isinstance(filtered, GeoDataFrame) + assert isinstance(filtered.loc[0, "geometry"], Polygon) + assert len(filtered) == n - cnt + # Now should not have the filter_ in buildings + assert filter_ not in filtered["building"].unique() + + required_cols = ['building', 'id', 'timestamp', 'version', 'geometry'] + + for col in required_cols: + assert col in filtered.columns + + +def test_reading_with_custom_filters_selecting_specific_osm_element(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=helsinki_pbf) + + # Test getting only relations + # --------------------------- + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': True}, + filter_type="keep", + keep_nodes=False, + keep_ways=False, + keep_relations=True) + assert isinstance(filtered, GeoDataFrame) + + # Now should only have 'relation' osm_type + assert len(filtered['osm_type'].unique()) == 1 + assert filtered['osm_type'].unique()[0] == 'relation' + assert len(filtered) == 64 + + # Test getting only ways + # --------------------------- + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': True}, + filter_type="keep", + keep_nodes=False, + keep_ways=True, + keep_relations=False) + assert isinstance(filtered, GeoDataFrame) + + # Now should only have 'way' osm_type + assert len(filtered['osm_type'].unique()) == 1 + assert filtered['osm_type'].unique()[0] == 'way' + assert len(filtered) == 422 + + # Test getting only nodes + # --------------------------- + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': True}, + filter_type="keep", + keep_nodes=True, + keep_ways=False, + keep_relations=False) + assert isinstance(filtered, GeoDataFrame) + + # Now should only have 'node' osm_type + assert len(filtered['osm_type'].unique()) == 1 + assert filtered['osm_type'].unique()[0] == 'node' + assert len(filtered) == 36 + + +def test_custom_filters_with_custom_keys(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=helsinki_pbf) + + # Test reading public transport related data + filtered = osm.get_osm_by_custom_criteria(custom_filter={'public_transport': True}, + filter_type="keep", + ) + assert isinstance(filtered, GeoDataFrame) + assert len(filtered) == 112 + + # Test a more complicated query + # ----------------------------- + + # Test reading all transit related data (bus, trains, trams, metro etc.) + # Exclude nodes (not keeping stops, etc.) + routes = ["bus", "ferry", "railway", "subway", "train", "tram", "trolleybus"] + rails = ["tramway", "light_rail", "rail", "subway", "tram"] + bus = ['yes'] + + transit = osm.get_osm_by_custom_criteria(custom_filter={ + 'route': routes, + 'railway': rails, + 'bus': bus, + 'public_transport': True}, + filter_type="keep", + keep_nodes=False) + + assert isinstance(transit, GeoDataFrame) + assert len(transit) == 374 From 8fef43ba14d923ba2e78daca941c5cb233bec2b8 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 16:39:44 +0100 Subject: [PATCH 11/27] Include basic info for public_transport as well --- pyrosm/config/default_tags.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyrosm/config/default_tags.py b/pyrosm/config/default_tags.py index 2b37e98..ad36197 100644 --- a/pyrosm/config/default_tags.py +++ b/pyrosm/config/default_tags.py @@ -856,7 +856,8 @@ # PUBLIC_TRANSPORT TAGS # ======================== # See: https://wiki.openstreetmap.org/wiki/Key%3Apublic_transport -public_transport_columns = ["stop_position", +public_transport_columns = basic_info_tags + \ + ["stop_position", "platform", "station", "stop_area", From 29e15407f84e11ce08e67e9876b703b0b13ee406 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 16:40:13 +0100 Subject: [PATCH 12/27] Add utility functions for validating user input --- pyrosm/utils/__init__.py | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/pyrosm/utils/__init__.py b/pyrosm/utils/__init__.py index e69de29..8b788fe 100644 --- a/pyrosm/utils/__init__.py +++ b/pyrosm/utils/__init__.py @@ -0,0 +1,53 @@ +def validate_custom_filter(custom_filter): + # Check that the custom filter is in correct format + if not isinstance(custom_filter, dict): + raise ValueError(f"'custom_filter' should be a Python dictionary. " + f"Got {custom_filter} with type {type(custom_filter)}.") + + for k, v in custom_filter.items(): + if not isinstance(k, str): + raise ValueError(f"OSM key in 'custom_filter' should be string. " + f"Got {k} of type {type(k)}") + if v is True: + continue + + if not isinstance(v, list): + raise ValueError(f"OSM tags in 'custom_filter' should be inside a list. " + f"Got {v} of type {type(v)}") + + for item in v: + if not isinstance(item, str): + raise ValueError(f"OSM tag (value) in 'custom_filter' should be string. " + f"Got {item} of type {type(item)}") + + +def validate_osm_keys(osm_keys): + if osm_keys is not None: + if type(osm_keys) not in [str, list]: + raise ValueError(f"'osm_keys_to_keep' -parameter should be of type str or list. " + f"Got {osm_keys} of type {type(osm_keys)}.") + + +def validate_tags_as_columns(tags_as_columns): + if not isinstance(tags_as_columns, list): + raise ValueError(f"'tags_as_columns' should be a list. " + f"Got {tags_as_columns} of type {type(tags_as_columns)}.") + for col in tags_as_columns: + if not isinstance(col, str): + raise ValueError(f"All tags listed in 'tags_as_columns' should be strings. " + f"Got {col} of type {type(col)}.") + + +def validate_booleans(keep_nodes, keep_ways, keep_relations): + if not isinstance(keep_nodes, bool): + raise ValueError("'keep_nodes' should be boolean type: True or False") + + if not isinstance(keep_ways, bool): + raise ValueError("'keep_ways' should be boolean type: True or False") + + if not isinstance(keep_relations, bool): + raise ValueError("'keep_relations' should be boolean type: True or False") + + if keep_nodes is False and keep_ways is False and keep_relations is False: + raise ValueError("At least on of the following parameters should be True: " + "'keep_nodes', 'keep_ways', or 'keep_relations'") \ No newline at end of file From 15bf0efb8d3aed32e1c93ed534602138a407e870 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 16:40:51 +0100 Subject: [PATCH 13/27] Ensure ways are not processed if None is passed --- pyrosm/data_manager.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyrosm/data_manager.pyx b/pyrosm/data_manager.pyx index 23e4885..809cbc3 100644 --- a/pyrosm/data_manager.pyx +++ b/pyrosm/data_manager.pyx @@ -99,6 +99,10 @@ cdef get_osm_ways_and_relations(way_records, relations, osm_keys, tags_as_column # Tags that should always be kept tags_as_columns += ["id", "nodes", "timestamp", "version"] + # If any way records weren't passed in, cannot parse anything + if way_records is None: + return None, None, None + # Get relations for specified OSM keys (one or multiple) if relations is not None: filtered_relations = get_relation_arrays(relations, osm_keys, data_filter) From 1b2e8f39f67341c0ad0e2c8e7126818038aa0b56 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 16:41:19 +0100 Subject: [PATCH 14/27] Add function for user_defined query --- pyrosm/user_defined.py | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 pyrosm/user_defined.py diff --git a/pyrosm/user_defined.py b/pyrosm/user_defined.py new file mode 100644 index 0000000..553fde1 --- /dev/null +++ b/pyrosm/user_defined.py @@ -0,0 +1,59 @@ +from pyrosm.data_manager import get_osm_data +from pyrosm.frames import prepare_geodataframe +import geopandas as gpd +import warnings + + +def get_user_defined_data(nodes, + node_coordinates, + way_records, + relations, + tags_as_columns, + custom_filter, + osm_keys, + filter_type, + keep_nodes, + keep_ways, + keep_relations): + + if not keep_nodes: + nodes = None + + # If wanting to parse relations but not ways, + # it is still necessary to parse ways as well at this point + if keep_ways is False and keep_relations is True: + pass + # If ways are not wanted, neither should relations be parsed + elif not keep_ways: + way_records = None + relations = None + + if not keep_relations: + relations = None + + # Call signature for fetching POIs + nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, + way_records=way_records, + relations=relations, + tags_as_columns=tags_as_columns, + data_filter=custom_filter, + filter_type=filter_type, + osm_keys=osm_keys, + ) + + # If there weren't any data, return empty GeoDataFrame + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any OSM data for given area.", + UserWarning, + stacklevel=2) + return gpd.GeoDataFrame() + + # Ensure that ways are None if returning those are not requested + if not keep_ways: + ways = None + + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + + return gdf From 8422828b9894b3c60a0de5362a89a34605d78681 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 16:41:27 +0100 Subject: [PATCH 15/27] Add function for user_defined query --- pyrosm/pyrosm.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/pyrosm/pyrosm.py b/pyrosm/pyrosm.py index 4bc7bc6..7efbe09 100644 --- a/pyrosm/pyrosm.py +++ b/pyrosm/pyrosm.py @@ -3,6 +3,8 @@ from pyrosm._arrays import concatenate_dicts_of_arrays from pyrosm.geometry import create_node_coordinates_lookup from pyrosm.frames import create_nodes_gdf +from pyrosm.utils import validate_custom_filter, validate_osm_keys, \ + validate_tags_as_columns, validate_booleans from shapely.geometry import Polygon, MultiPolygon from pyrosm.buildings import get_building_data @@ -10,6 +12,7 @@ from pyrosm.natural import get_natural_data from pyrosm.networks import get_network_data from pyrosm.pois import get_poi_data +from pyrosm.user_defined import get_user_defined_data class OSM: @@ -217,7 +220,6 @@ def get_landuse(self, custom_filter=None): gdf = gdf.drop("nodes", axis=1) return gdf - def get_natural(self, custom_filter=None): """ Parses natural from OSM. @@ -365,6 +367,107 @@ def get_pois(self, custom_filter=None): gdf = gdf.drop("nodes", axis=1) return gdf + def get_osm_by_custom_criteria(self, + custom_filter, + osm_keys_to_keep=None, + filter_type="keep", + tags_as_columns=None, + keep_nodes=True, + keep_ways=True, + keep_relations=True): + """ + Parse OSM data based on custom criteria. + + Parameters + ---------- + + custom_filter : dict (required) + A custom filter to filter only specific POIs from OpenStreetMap. + + osm_keys_to_keep : str | list + A filter to specify which OSM keys should be kept. + + filter_type : str + "keep" | "exclude" + Whether the filters should be used to keep or exclude the data from OSM. + + tags_as_columns : list + Which tags should be kept as columns in the resulting GeoDataFrame. + + keep_nodes : bool + Whether or not the nodes should be kept in the resulting GeoDataFrame if they are found. + + keep_ways : bool + Whether or not the ways should be kept in the resulting GeoDataFrame if they are found. + + keep_relations : bool + Whether or not the relations should be kept in the resulting GeoDataFrame if they are found. + + """ + + # Check that the custom filter is in correct format + validate_custom_filter(custom_filter) + + if not isinstance(filter_type, str): + raise ValueError("'filter_type' -parameter should be either 'keep' or 'exclude'. ") + + # Validate osm keys + validate_osm_keys(osm_keys_to_keep) + if isinstance(osm_keys_to_keep, str): + osm_keys_to_keep = [osm_keys_to_keep] + + # Validate filter + filter_type = filter_type.lower() + if filter_type not in ["keep", "exclude"]: + raise ValueError("'filter_type' -parameter should be either 'keep' or 'exclude'. ") + + # Tags to keep as columns + if tags_as_columns is None: + tags_as_columns = [] + for k in custom_filter.keys(): + try: + tags_as_columns += getattr(self.conf.tags, k) + except Exception as e: + pass + # If tags weren't available in conf, store keys as columns by default + # (all other tags in such cases will be stored in 'tags' column as JSON) + if len(tags_as_columns) == 0: + tags_as_columns = list(custom_filter.keys()) + + else: + # Validate tags + validate_tags_as_columns(tags_as_columns) + + # Validate booleans + validate_booleans(keep_nodes, keep_ways, keep_relations) + + if self._nodes is None or self._way_records is None: + self._read_pbf() + + # If nodes are still in chunks, merge before passing forward + if isinstance(self._nodes, list): + self._nodes = concatenate_dicts_of_arrays(self._nodes) + + gdf = get_user_defined_data(self._nodes, + self._node_coordinates, + self._way_records, + self._relations, + tags_as_columns, + custom_filter, + osm_keys_to_keep, + filter_type, + keep_nodes, + keep_ways, + keep_relations + ) + + # Do not keep node information unless specifically asked for + # (they are in a list, and can cause issues when saving the files) + if not self.keep_node_info: + if "nodes" in gdf.columns: + gdf = gdf.drop("nodes", axis=1) + return gdf + def __getattribute__(self, name): # If node-gdf is requested convert to gdf before returning if name == "_nodes_gdf": From 7ac80989a9d8c615aaf8e7a4557da8358ad9f300 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 20:29:45 +0100 Subject: [PATCH 16/27] Add Cython cleaning helper for Windows --- make.bat | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 make.bat diff --git a/make.bat b/make.bat new file mode 100644 index 0000000..088e47c --- /dev/null +++ b/make.bat @@ -0,0 +1,34 @@ +@echo off + +REM Cython building utility commands for Windows + +REM Clean all C-files, pyd-files, pyrobuf-directory, build-directory, and egg-info +if "%1" == "clean" ( + IF EXIST *.pyd ( + del /S *.pyd + ) + + IF EXIST *.c ( + del /S *.c + ) + + IF EXIST .coverage ( + del /S .coverage + ) + + IF EXIST pyrosm.egg-info ( + RMDIR /S /Q pyrosm.egg-info + ) + + IF EXIST pyrobuf ( + RMDIR /S /Q pyrobuf + ) + + IF EXIST build ( + RMDIR /S /Q build + ) + + IF EXIST .pytest_cache ( + RMDIR /S /Q .pytest_cache + ) +) From af7b77e37b9eb1d1bc908a9c15c56159ec1989f7 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 20:34:07 +0100 Subject: [PATCH 17/27] Add Cython cleaning helper for Windows --- make.bat | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/make.bat b/make.bat index 088e47c..2f06f2c 100644 --- a/make.bat +++ b/make.bat @@ -7,10 +7,8 @@ if "%1" == "clean" ( IF EXIST *.pyd ( del /S *.pyd ) - - IF EXIST *.c ( - del /S *.c - ) + REM For Some reason C-files are not detected automatically with if-exist + del /S *.c IF EXIST .coverage ( del /S .coverage From 737cb488692d8dc9a0611e9210c43103813deaf6 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:04:30 +0100 Subject: [PATCH 18/27] Improve test coverage --- tests/test_building_parsing.py | 23 ++++++++++ tests/test_custom_filter.py | 34 +++++++++++++++ tests/test_data.py | 38 +++++++++++++++++ tests/test_landuse_parsing.py | 49 ++++++++++++++++++++++ tests/test_main.py | 77 ++++++++++++++++++++++++++++++++++ tests/test_natural_parsing.py | 40 ++++++++++++++++++ tests/test_network_parsing.py | 69 ++++++++++++++++++++++++++++++ tests/test_poi_parsing.py | 34 +++++++++++++++ 8 files changed, 364 insertions(+) create mode 100644 tests/test_data.py create mode 100644 tests/test_main.py diff --git a/tests/test_building_parsing.py b/tests/test_building_parsing.py index 5b179c3..95ba038 100644 --- a/tests/test_building_parsing.py +++ b/tests/test_building_parsing.py @@ -204,3 +204,26 @@ def test_reading_buildings_from_area_having_none(helsinki_pbf): # Result should be empty GeoDataFrame assert isinstance(gdf, GeoDataFrame) assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_buildings(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e + + +def test_passing_custom_filter_without_element_key(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + osm = OSM(filepath=test_pbf) + gdf = osm.get_buildings(custom_filter={"start_date": True}) + assert isinstance(gdf, GeoDataFrame) + diff --git a/tests/test_custom_filter.py b/tests/test_custom_filter.py index 674eb7d..43fe8ac 100644 --- a/tests/test_custom_filter.py +++ b/tests/test_custom_filter.py @@ -231,6 +231,19 @@ def test_using_incorrect_booleans(test_pbf): raise e + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_relations=False, + keep_ways=False, + keep_nodes=False + ) + except ValueError as e: + if "At least on of the following parameters should be True" in str(e): + pass + else: + raise e + + def test_using_incorrect_osm_keys(test_pbf): from pyrosm import OSM osm = OSM(filepath=test_pbf) @@ -389,3 +402,24 @@ def test_custom_filters_with_custom_keys(helsinki_pbf): assert isinstance(transit, GeoDataFrame) assert len(transit) == 374 + + +def test_reading_custom_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.940514, 60.173849, 24.942, 60.175892] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_osm_by_custom_criteria({"highway": ["primary"]}) + # Check the warning text + if "could not find any OSM data" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) \ No newline at end of file diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 0000000..cb2b3be --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,38 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + +def test_available(): + import pyrosm + assert isinstance(pyrosm.data.available, list) + + +def test_not_available(): + try: + get_path("file_not_existing") + except ValueError as e: + if "is not available" in str(e): + pass + else: + raise e + except Exception as e: + raise e + + +def test_temp_dir(): + import pyrosm + import os + assert os.path.isdir(os.path.dirname( + pyrosm.data._temp_path)) diff --git a/tests/test_landuse_parsing.py b/tests/test_landuse_parsing.py index f5b4515..f141909 100644 --- a/tests/test_landuse_parsing.py +++ b/tests/test_landuse_parsing.py @@ -8,6 +8,12 @@ def test_pbf(): return pbf_path +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + def test_parsing_landuse_with_defaults(test_pbf): from pyrosm import OSM from pyrosm.landuse import get_landuse_data @@ -36,3 +42,46 @@ def test_parsing_landuse_with_defaults(test_pbf): # Test shape assert len(gdf) == 50 assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_reading_landuse_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.947241, 60.174997, 24.948240, 60.175716] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_landuse() + # Check the warning text + if "could not find any buildings" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_landuse(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e + + +def test_passing_custom_filter_without_element_key(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + osm = OSM(filepath=test_pbf) + gdf = osm.get_landuse(custom_filter={"leisure": True}) + assert isinstance(gdf, GeoDataFrame) \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..fbbbcbc --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,77 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +def test_network(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_network() + assert isinstance(gdf, GeoDataFrame) + + +def test_buildings(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_buildings() + assert isinstance(gdf, GeoDataFrame) + + +def test_landuse(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_landuse() + assert isinstance(gdf, GeoDataFrame) + + +def test_pois(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_pois() + assert isinstance(gdf, GeoDataFrame) + + +def test_natural(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_natural() + assert isinstance(gdf, GeoDataFrame) + + +def test_custom(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_osm_by_custom_criteria({"highway": ["secondary"]}) + assert isinstance(gdf, GeoDataFrame) + + +def test_passing_incorrect_filepath(): + from pyrosm import OSM + try: + OSM(11) + except ValueError: + pass + except Exception as e: + raise e + + +def test_passing_wrong_file_format(): + from pyrosm import OSM + try: + OSM("test.osm") + except ValueError: + pass + except Exception as e: + raise e + diff --git a/tests/test_natural_parsing.py b/tests/test_natural_parsing.py index c1ab5d4..910034a 100644 --- a/tests/test_natural_parsing.py +++ b/tests/test_natural_parsing.py @@ -8,6 +8,12 @@ def test_pbf(): return pbf_path +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + def test_parsing_natural_with_defaults(test_pbf): from pyrosm import OSM from pyrosm.natural import get_natural_data @@ -36,3 +42,37 @@ def test_parsing_natural_with_defaults(test_pbf): # Test shape assert len(gdf) == 14 assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_reading_natural_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.939753, 60.173388, 24.941269,60.174829] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_natural() + # Check the warning text + if "could not find any buildings" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_natural(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e \ No newline at end of file diff --git a/tests/test_network_parsing.py b/tests/test_network_parsing.py index c40e104..4f52fba 100644 --- a/tests/test_network_parsing.py +++ b/tests/test_network_parsing.py @@ -67,6 +67,30 @@ def test_filter_network_by_driving(test_pbf): assert "path" not in gdf["highway"].unique() +def test_filter_network_by_driving_with_service_roads(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + from shapely.geometry import LineString + osm = OSM(filepath=test_pbf) + gdf = osm.get_network(network_type="driving+service") + + assert isinstance(gdf.loc[0, 'geometry'], LineString) + assert isinstance(gdf, GeoDataFrame) + + # Test shape + assert gdf.shape == (200, 18) + + required_cols = ['access', 'bridge', 'highway', 'int_ref', 'lanes', 'lit', 'maxspeed', + 'name', 'oneway', 'ref', 'service', 'surface', 'id', 'geometry', 'tags', + 'osm_type'] + for col in required_cols: + assert col in gdf.columns + + # Should not include 'footway' or 'path' ways by default + assert "footway" not in gdf["highway"].unique() + assert "path" not in gdf["highway"].unique() + + def test_filter_network_by_cycling(test_pbf): from pyrosm import OSM from geopandas import GeoDataFrame @@ -91,6 +115,26 @@ def test_filter_network_by_cycling(test_pbf): assert "motorway_link" not in gdf["highway"].unique() +def test_filter_network_by_all(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + from shapely.geometry import LineString + osm = OSM(filepath=test_pbf) + gdf = osm.get_network(network_type="all") + + assert isinstance(gdf.loc[0, 'geometry'], LineString) + assert isinstance(gdf, GeoDataFrame) + + # Test shape + assert gdf.shape == (331, 21) + + required_cols = ['access', 'bicycle', 'bridge', 'foot', 'highway', 'lanes', 'lit', + 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'tunnel', + 'id', 'geometry', 'tags', 'osm_type'] + for col in required_cols: + assert col in gdf.columns + + def test_saving_network_to_shapefile(test_pbf, test_output_dir): import os from pyrosm import OSM @@ -195,6 +239,31 @@ def test_passing_incorrect_bounding_box(test_pbf): raise e +def test_passing_incorrect_net_type(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_network("wrong_network") + except ValueError as e: + if "'network_type' should be one of the following" in str(e): + pass + else: + raise(e) + except Exception as e: + raise e + + try: + osm.get_network(42) + except ValueError as e: + if "'network_type' should be one of the following" in str(e): + pass + else: + raise(e) + except Exception as e: + raise e + + def test_reading_network_from_area_without_data(helsinki_pbf): from pyrosm import OSM from geopandas import GeoDataFrame diff --git a/tests/test_poi_parsing.py b/tests/test_poi_parsing.py index fc556b0..571a370 100644 --- a/tests/test_poi_parsing.py +++ b/tests/test_poi_parsing.py @@ -61,3 +61,37 @@ def test_parsing_pois_with_defaults(helsinki_pbf, default_filter): # Test shape assert len(gdf) == 1780 assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_reading_pois_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.940514, 60.173849, 24.942, 60.175892] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_pois() + # Check the warning text + if "could not find any buildings" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_pois(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e \ No newline at end of file From 6dd0e597374b14e1325090f827b0cea66fff1f33 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:07:40 +0100 Subject: [PATCH 19/27] Update docs --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1a503e8..ba59a51 100644 --- a/README.md +++ b/README.md @@ -28,15 +28,16 @@ which is also used by OpenStreetMap contributors to distribute the OSM data in P - read Points of Interest (POI) from PBF - read landuse from PBF - read "natural" from PBF + - read any other data from PBF by using a custom user-defined filter - filter data based on bounding box - - apply custom criteria with buildings and POIs to filter the data - - e.g. keeping only specific type of buildings can be done by applying a filter: `{'building': ['residential', 'retail']}` ## Roadmap + - improve docs and make simple website + - run benchmarks against other tools - add possibility to crop PBF and save a subset into new PBF. - - add more tests + - automate PBF downloading from Geofabrik (?) ## Install From 290b6da8683a3b439746c331ea449dd7f209a2dd Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:08:18 +0100 Subject: [PATCH 20/27] Remove (currently) obsolete code block --- pyrosm/data/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyrosm/data/__init__.py b/pyrosm/data/__init__.py index 4dfc3e6..adfd8d3 100644 --- a/pyrosm/data/__init__.py +++ b/pyrosm/data/__init__.py @@ -24,8 +24,6 @@ def get_path(dataset): """ if dataset in _package_files: return os.path.abspath(os.path.join(_module_path, _package_files[dataset])) - elif dataset in _temp_files: - return os.path.join(_temp_path, _temp_files[dataset]) else: msg = "The dataset '{data}' is not available. ".format(data=dataset) msg += "Available datasets are {}".format(", ".join(available)) From d1eb82603ddccb281f8986c94a7cf6386b84395b Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:08:53 +0100 Subject: [PATCH 21/27] Use validation from utils --- pyrosm/buildings.py | 5 ++--- pyrosm/landuse.py | 5 ++--- pyrosm/natural.py | 5 ++--- pyrosm/pois.py | 3 +++ 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pyrosm/buildings.py b/pyrosm/buildings.py index 696af91..f569223 100644 --- a/pyrosm/buildings.py +++ b/pyrosm/buildings.py @@ -1,5 +1,6 @@ from pyrosm.data_manager import get_osm_data from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter import geopandas as gpd import warnings @@ -10,9 +11,7 @@ def get_building_data(node_coordinates, way_records, relations, tags_as_columns, custom_filter = {"building": True} else: # Check that the custom filter is in correct format - if not isinstance(custom_filter, dict): - raise ValueError(f"'custom_filter' should be a Python dictionary. " - f"Got {custom_filter} with type {type(custom_filter)}.") + validate_custom_filter(custom_filter) # Ensure that the "building" tag exists if "building" not in custom_filter.keys(): diff --git a/pyrosm/landuse.py b/pyrosm/landuse.py index 46083c2..5693467 100644 --- a/pyrosm/landuse.py +++ b/pyrosm/landuse.py @@ -1,5 +1,6 @@ from pyrosm.data_manager import get_osm_data from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter import geopandas as gpd import warnings @@ -10,9 +11,7 @@ def get_landuse_data(nodes, node_coordinates, way_records, relations, tags_as_co custom_filter = {"landuse": True} else: # Check that the custom filter is in correct format - if not isinstance(custom_filter, dict): - raise ValueError(f"'custom_filter' should be a Python dictionary. " - f"Got {custom_filter} with type {type(custom_filter)}.") + validate_custom_filter(custom_filter) # Ensure that the "landuse" tag exists if "landuse" not in custom_filter.keys(): diff --git a/pyrosm/natural.py b/pyrosm/natural.py index 925279b..74cbbdc 100644 --- a/pyrosm/natural.py +++ b/pyrosm/natural.py @@ -1,5 +1,6 @@ from pyrosm.data_manager import get_osm_data from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter import geopandas as gpd import warnings @@ -10,9 +11,7 @@ def get_natural_data(nodes, node_coordinates, way_records, relations, tags_as_co custom_filter = {"natural": True} else: # Check that the custom filter is in correct format - if not isinstance(custom_filter, dict): - raise ValueError(f"'custom_filter' should be a Python dictionary. " - f"Got {custom_filter} with type {type(custom_filter)}.") + validate_custom_filter(custom_filter) # Ensure that the "landuse" tag exists if "natural" not in custom_filter.keys(): diff --git a/pyrosm/pois.py b/pyrosm/pois.py index 9a47654..60d82f1 100644 --- a/pyrosm/pois.py +++ b/pyrosm/pois.py @@ -1,10 +1,13 @@ from pyrosm.data_manager import get_osm_data from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter import geopandas as gpd import warnings def get_poi_data(nodes, node_coordinates, way_records, relations, tags_as_columns, custom_filter): + # Validate filter + validate_custom_filter(custom_filter) # Call signature for fetching POIs nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, From 87ca4bb5b65a5ec7d6b8ca428073b328005235ab Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:09:31 +0100 Subject: [PATCH 22/27] Fix case where data_filter is None --- pyrosm/data_filter.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyrosm/data_filter.pyx b/pyrosm/data_filter.pyx index 026eb14..711c69b 100644 --- a/pyrosm/data_filter.pyx +++ b/pyrosm/data_filter.pyx @@ -93,8 +93,9 @@ cdef filter_osm_records(data_records, if not isinstance(osm_data_type, list): osm_data_type = [osm_data_type] - if len(data_filter) == 0: - data_filter = None + if data_filter is not None: + if len(data_filter) == 0: + data_filter = None if data_filter is not None: filter_keys = list(data_filter.keys()) From 0dd6584a61fca59b45b411b592560ed721202d41 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:10:03 +0100 Subject: [PATCH 23/27] Ensure relation records are not returned if no ways were found --- pyrosm/data_manager.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyrosm/data_manager.pyx b/pyrosm/data_manager.pyx index 809cbc3..699ed4e 100644 --- a/pyrosm/data_manager.pyx +++ b/pyrosm/data_manager.pyx @@ -85,8 +85,9 @@ cdef get_way_arrays(way_records, relation_way_ids, osm_keys, tags_as_columns, da if relation_way_ids is not None: # Separate ways that are part of a relation ways, relation_ways = separate_relation_ways(ways, relation_way_ids) - relation_ways = convert_way_records_to_lists(relation_ways, tags_as_columns) - relation_arrays = convert_to_arrays_and_drop_empty(relation_ways) + if len(relation_ways) > 0: + relation_ways = convert_way_records_to_lists(relation_ways, tags_as_columns) + relation_arrays = convert_to_arrays_and_drop_empty(relation_ways) # Process separated ways ways = convert_way_records_to_lists(ways, tags_as_columns) @@ -126,6 +127,9 @@ cdef get_osm_ways_and_relations(way_records, relations, osm_keys, tags_as_column tags_as_columns, data_filter, filter_type) + # If relation ways could not be parsed, also relations should be returned as None + if relation_ways is None: + filtered_relations = None # If there weren't any ways return None if ways is None: From 5f7af598bd3b789f921c4a77476cdadd9bb01e4c Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:10:36 +0100 Subject: [PATCH 24/27] Add possibility to filter driving with public service roads included. --- pyrosm/pyrosm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyrosm/pyrosm.py b/pyrosm/pyrosm.py index 7efbe09..8b83bd3 100644 --- a/pyrosm/pyrosm.py +++ b/pyrosm/pyrosm.py @@ -78,9 +78,9 @@ def _read_pbf(self): def _get_network_filter(self, net_type): possible_filters = [a for a in self.conf.network_filters.__dir__() if "__" not in a] - possible_filters += ["all"] + possible_filters += ["all", "driving+service"] possible_values = ", ".join(possible_filters) - msg = "'net_type' should be one of the following: " + possible_values + msg = "'network_type' should be one of the following: " + possible_values if not isinstance(net_type, str): raise ValueError(msg) @@ -94,6 +94,8 @@ def _get_network_filter(self, net_type): return self.conf.network_filters.walking elif net_type == "driving": return self.conf.network_filters.driving + elif net_type == "driving+service": + return self.conf.network_filters.driving_psv elif net_type == "cycling": return self.conf.network_filters.cycling elif net_type == "all": From 5085f6429c74610373ac12c84161e518ed3df8a2 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:16:21 +0100 Subject: [PATCH 25/27] Update changes --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bfde38f..ca23038 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,23 @@ Changelog ========= +v0.4.0 +------ + +- read PBF using custom queries (allows anything to be fetched) +- read landuse from PBF +- read natural from PBF +- improve geometry parsing so that geometry type is read automatically according OSM rules +- modularize code-base +- improve test coverage + + +v0.3.2 +------ + +- generalize code base +- read Points of Interest (POI) from PBF + v0.2.0 ------ From e568699f5a72640952e3d319db1f5644b787e077 Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:19:00 +0100 Subject: [PATCH 26/27] Update changes --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca23038..8dcfcc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ v0.4.0 - improve test coverage -v0.3.2 +v0.3.1 ------ - generalize code base From 26c8e6221af31fafb6b73610ceaab5b6c5f03f1a Mon Sep 17 00:00:00 2001 From: Henrikki Tenkanen Date: Thu, 16 Apr 2020 21:19:57 +0100 Subject: [PATCH 27/27] Pump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b0acf98..df785bb 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ def read_long_description(): setup( name='pyrosm', - version='0.3.2', + version='0.4.0', license='MIT', description='A Python tool to parse OSM data from Protobuf format into GeoDataFrame.', long_description=read_long_description(),