diff --git a/CHANGELOG.md b/CHANGELOG.md index bfde38f..8dcfcc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,23 @@ Changelog ========= +v0.4.0 +------ + +- read PBF using custom queries (allows anything to be fetched) +- read landuse from PBF +- read natural from PBF +- improve geometry parsing so that geometry type is read automatically according OSM rules +- modularize code-base +- improve test coverage + + +v0.3.1 +------ + +- generalize code base +- read Points of Interest (POI) from PBF + v0.2.0 ------ diff --git a/README.md b/README.md index fcd5ef2..ba59a51 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,12 @@ [![PyPI version](https://badge.fury.io/py/pyrosm.svg)](https://badge.fury.io/py/pyrosm)[![build status](https://api.travis-ci.org/HTenkanen/pyrosm.svg?branch=master)](https://travis-ci.org/HTenkanen/pyrosm)[![Coverage Status](https://codecov.io/gh/HTenkanen/pyrosm/branch/master/graph/badge.svg)](https://codecov.io/gh/HTenkanen/pyrosm) **Pyrosm** is a Python library for reading OpenStreetMap from `protobuf` files (`*.osm.pbf`) into Geopandas GeoDataFrames. -Pyrosm makes it easy to extract various datasets from OpenStreetMap pbf-dumps including e.g. road networks and buildings (points of interest in progress). +Pyrosm makes it easy to extract various datasets from OpenStreetMap pbf-dumps including e.g. road networks, buildings, +Points of Interest (POI), landuse and natural elements. Also fully customized queries are supported which makes it possible +to parse the data from OSM with more specific filters. -**Pyrosm** is easy to use and it provides a somewhat similar user interface as another popular Python library [OSMnx](https://github.com/gboeing/osmnx) -for parsing different datasets from the OpenStreetMap pbf-dump including road networks, buildings and Points of Interest (later also landuse and possibility to make customized calls). +**Pyrosm** is easy to use and it provides a somewhat similar user interface as [OSMnx](https://github.com/gboeing/osmnx). The main difference between pyrosm and OSMnx is that OSMnx reads the data over internet using OverPass API, whereas pyrosm reads the data from local OSM data dumps that can be downloaded e.g. from [GeoFabrik's website](http://download.geofabrik.de/). This makes it possible to read data much faster thus allowing e.g. parsing street networks for whole country in a matter of minutes instead of hours (however, see [caveats](#caveats)). @@ -25,16 +26,18 @@ which is also used by OpenStreetMap contributors to distribute the OSM data in P - read street networks (separately for driving, cycling, walking and all-combined) - read buildings from PBF - read Points of Interest (POI) from PBF + - read landuse from PBF + - read "natural" from PBF + - read any other data from PBF by using a custom user-defined filter - filter data based on bounding box - - apply custom filter to filter data - - e.g. keeping only specific type of buildings can be done by applying a filter: `{'building': ['residential', 'retail']}` ## Roadmap - - add parsing of landuse + - improve docs and make simple website + - run benchmarks against other tools - add possibility to crop PBF and save a subset into new PBF. - - add more tests + - automate PBF downloading from Geofabrik (?) ## Install diff --git a/make.bat b/make.bat new file mode 100644 index 0000000..2f06f2c --- /dev/null +++ b/make.bat @@ -0,0 +1,32 @@ +@echo off + +REM Cython building utility commands for Windows + +REM Clean all C-files, pyd-files, pyrobuf-directory, build-directory, and egg-info +if "%1" == "clean" ( + IF EXIST *.pyd ( + del /S *.pyd + ) + REM For Some reason C-files are not detected automatically with if-exist + del /S *.c + + IF EXIST .coverage ( + del /S .coverage + ) + + IF EXIST pyrosm.egg-info ( + RMDIR /S /Q pyrosm.egg-info + ) + + IF EXIST pyrobuf ( + RMDIR /S /Q pyrobuf + ) + + IF EXIST build ( + RMDIR /S /Q build + ) + + IF EXIST .pytest_cache ( + RMDIR /S /Q .pytest_cache + ) +) diff --git a/pyrosm/buildings.py b/pyrosm/buildings.py index 9a3a3da..f569223 100644 --- a/pyrosm/buildings.py +++ b/pyrosm/buildings.py @@ -1,7 +1,6 @@ from pyrosm.data_manager import get_osm_data -from pyrosm.geometry import create_polygon_geometries -from pyrosm.frames import create_gdf -from pyrosm.relations import prepare_relations +from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter import geopandas as gpd import warnings @@ -12,9 +11,7 @@ def get_building_data(node_coordinates, way_records, relations, tags_as_columns, custom_filter = {"building": True} else: # Check that the custom filter is in correct format - if not isinstance(custom_filter, dict): - raise ValueError(f"'custom_filter' should be a Python dictionary. " - f"Got {custom_filter} with type {type(custom_filter)}.") + validate_custom_filter(custom_filter) # Ensure that the "building" tag exists if "building" not in custom_filter.keys(): @@ -31,32 +28,13 @@ def get_building_data(node_coordinates, way_records, relations, tags_as_columns, ) # If there weren't any data, return empty GeoDataFrame - if ways is None: - warnings.warn("Could not find any buildings for given area.", + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any landuse elements for given area.", UserWarning, stacklevel=2) return gpd.GeoDataFrame() - # Create geometries for normal ways - geometries = create_polygon_geometries(node_coordinates, - ways) - - # Convert to GeoDataFrame - way_gdf = create_gdf(ways, geometries) - way_gdf["osm_type"] = "way" - - # Prepare relation data if it is available - if relations is not None: - relations = prepare_relations(relations, relation_ways, - node_coordinates, - tags_as_columns) - relation_gdf = gpd.GeoDataFrame(relations) - relation_gdf["osm_type"] = "relation" - - gdf = way_gdf.append(relation_gdf, ignore_index=True) - else: - gdf = way_gdf - - gdf = gdf.dropna(subset=['geometry']).reset_index(drop=True) - + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) return gdf diff --git a/pyrosm/config/default_tags.py b/pyrosm/config/default_tags.py index 2b37e98..ad36197 100644 --- a/pyrosm/config/default_tags.py +++ b/pyrosm/config/default_tags.py @@ -856,7 +856,8 @@ # PUBLIC_TRANSPORT TAGS # ======================== # See: https://wiki.openstreetmap.org/wiki/Key%3Apublic_transport -public_transport_columns = ["stop_position", +public_transport_columns = basic_info_tags + \ + ["stop_position", "platform", "station", "stop_area", diff --git a/pyrosm/data/__init__.py b/pyrosm/data/__init__.py index 4dfc3e6..adfd8d3 100644 --- a/pyrosm/data/__init__.py +++ b/pyrosm/data/__init__.py @@ -24,8 +24,6 @@ def get_path(dataset): """ if dataset in _package_files: return os.path.abspath(os.path.join(_module_path, _package_files[dataset])) - elif dataset in _temp_files: - return os.path.join(_temp_path, _temp_files[dataset]) else: msg = "The dataset '{data}' is not available. ".format(data=dataset) msg += "Available datasets are {}".format(", ".join(available)) diff --git a/pyrosm/data_filter.pyx b/pyrosm/data_filter.pyx index 026eb14..711c69b 100644 --- a/pyrosm/data_filter.pyx +++ b/pyrosm/data_filter.pyx @@ -93,8 +93,9 @@ cdef filter_osm_records(data_records, if not isinstance(osm_data_type, list): osm_data_type = [osm_data_type] - if len(data_filter) == 0: - data_filter = None + if data_filter is not None: + if len(data_filter) == 0: + data_filter = None if data_filter is not None: filter_keys = list(data_filter.keys()) diff --git a/pyrosm/data_manager.pyx b/pyrosm/data_manager.pyx index 23e4885..699ed4e 100644 --- a/pyrosm/data_manager.pyx +++ b/pyrosm/data_manager.pyx @@ -85,8 +85,9 @@ cdef get_way_arrays(way_records, relation_way_ids, osm_keys, tags_as_columns, da if relation_way_ids is not None: # Separate ways that are part of a relation ways, relation_ways = separate_relation_ways(ways, relation_way_ids) - relation_ways = convert_way_records_to_lists(relation_ways, tags_as_columns) - relation_arrays = convert_to_arrays_and_drop_empty(relation_ways) + if len(relation_ways) > 0: + relation_ways = convert_way_records_to_lists(relation_ways, tags_as_columns) + relation_arrays = convert_to_arrays_and_drop_empty(relation_ways) # Process separated ways ways = convert_way_records_to_lists(ways, tags_as_columns) @@ -99,6 +100,10 @@ cdef get_osm_ways_and_relations(way_records, relations, osm_keys, tags_as_column # Tags that should always be kept tags_as_columns += ["id", "nodes", "timestamp", "version"] + # If any way records weren't passed in, cannot parse anything + if way_records is None: + return None, None, None + # Get relations for specified OSM keys (one or multiple) if relations is not None: filtered_relations = get_relation_arrays(relations, osm_keys, data_filter) @@ -122,6 +127,9 @@ cdef get_osm_ways_and_relations(way_records, relations, osm_keys, tags_as_column tags_as_columns, data_filter, filter_type) + # If relation ways could not be parsed, also relations should be returned as None + if relation_ways is None: + filtered_relations = None # If there weren't any ways return None if ways is None: diff --git a/pyrosm/frames.pxd b/pyrosm/frames.pxd index 46d6df4..6d641c1 100644 --- a/pyrosm/frames.pxd +++ b/pyrosm/frames.pxd @@ -1,2 +1,6 @@ cpdef create_nodes_gdf(node_dict_list) -cpdef create_gdf(data_records, geometry_array) \ No newline at end of file +cpdef create_gdf(data_records, geometry_array) +cpdef prepare_way_gdf(node_coordinates, ways) +cpdef prepare_node_gdf(nodes) +cpdef prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) \ No newline at end of file diff --git a/pyrosm/frames.pyx b/pyrosm/frames.pyx index 6346222..97e53a1 100644 --- a/pyrosm/frames.pyx +++ b/pyrosm/frames.pyx @@ -2,9 +2,11 @@ import pandas as pd import geopandas as gpd from pyrosm._arrays cimport concatenate_dicts_of_arrays from pyrosm.geometry cimport _create_point_geometries - +from pyrosm.geometry cimport create_way_geometries +from pyrosm.relations import prepare_relations cpdef create_nodes_gdf(nodes): + cdef str k if isinstance(nodes, list): nodes = concatenate_dicts_of_arrays(nodes) df = pd.DataFrame() @@ -13,10 +15,9 @@ cpdef create_nodes_gdf(nodes): df['geometry'] = _create_point_geometries(nodes['lon'], nodes['lat']) return gpd.GeoDataFrame(df, crs='epsg:4326') - cpdef create_gdf(data_arrays, geometry_array): + cdef str key df = pd.DataFrame() - for key, data in data_arrays.items(): # When inserting nodes, # those should be converted @@ -28,3 +29,53 @@ cpdef create_gdf(data_arrays, geometry_array): df['geometry'] = geometry_array return gpd.GeoDataFrame(df, crs='epsg:4326') + +cpdef prepare_way_gdf(node_coordinates, ways): + if ways is not None: + geometries = create_way_geometries(node_coordinates, + ways) + # Convert to GeoDataFrame + way_gdf = create_gdf(ways, geometries) + way_gdf['osm_type'] = "way" + else: + way_gdf = gpd.GeoDataFrame() + return way_gdf + +cpdef prepare_node_gdf(nodes): + if nodes is not None: + # Create GeoDataFrame from nodes + node_gdf = create_nodes_gdf(nodes) + node_gdf['osm_type'] = "node" + else: + node_gdf = gpd.GeoDataFrame() + return node_gdf + +cpdef prepare_relation_gdf(node_coordinates, relations, relation_ways, tags_as_columns): + if relations is not None: + relations = prepare_relations(relations, relation_ways, + node_coordinates, + tags_as_columns) + + relation_gdf = gpd.GeoDataFrame(relations) + relation_gdf['osm_type'] = "relation" + + else: + relation_gdf = gpd.GeoDataFrame() + return relation_gdf + +cpdef prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, + tags_as_columns): + # Prepare nodes + node_gdf = prepare_node_gdf(nodes) + + # Prepare ways + way_gdf = prepare_way_gdf(node_coordinates, ways) + + # Prepare relation data + relation_gdf = prepare_relation_gdf(node_coordinates, relations, relation_ways, tags_as_columns) + + # Merge all + gdf = pd.concat([node_gdf, way_gdf, relation_gdf]) + gdf = gdf.dropna(subset=['geometry']).reset_index(drop=True) + return gdf diff --git a/pyrosm/geometry.pxd b/pyrosm/geometry.pxd index bc9ac40..4280754 100644 --- a/pyrosm/geometry.pxd +++ b/pyrosm/geometry.pxd @@ -1,8 +1,8 @@ cpdef create_point_geometries(xarray, yarray) cdef _create_point_geometries(xarray, yarray) +cdef _create_way_geometries(node_coordinates, way_elements) cpdef create_way_geometries(node_coordinates, way_elements) cdef create_pygeos_polygon_from_relation(node_coordinates, relation_ways, member_roles) -cpdef create_polygon_geometries(node_coordinates, way_elements) cdef create_linear_ring(coordinates) cpdef create_node_coordinates_lookup(nodes) cdef pygeos_to_shapely(geom) diff --git a/pyrosm/geometry.pyx b/pyrosm/geometry.pyx index fa5fd46..f5e1ccc 100644 --- a/pyrosm/geometry.pyx +++ b/pyrosm/geometry.pyx @@ -74,34 +74,6 @@ cdef _create_point_geometries(xarray, yarray): dtype=object)) -cdef _create_way_geometries(node_coordinates, way_elements): - cdef long long node - cdef list coords, way_nodes - cdef int i, ii, nn, n = len(way_elements['id']) - - geometries = [] - for i in range(0, n): - way_nodes = way_elements['nodes'][i] - coords = [] - nn = len(way_nodes) - for ii in range(0, nn): - node = way_nodes[ii] - try: - coords.append((node_coordinates[node][0], - node_coordinates[node][1])) - except: - pass - if len(coords) > 1: - geometries.append(coords) - else: - geometries.append(None) - return to_shapely(np.array( - [linestrings(geom) - if geom is not None else None - for geom in geometries], - dtype=object)) - - cdef create_pygeos_polygon_from_relation(node_coordinates, relation_ways, member_roles): cdef int i, m_cnt cdef str role @@ -138,57 +110,98 @@ cdef create_pygeos_polygon_from_relation(node_coordinates, relation_ways, member return polygons(shell, holes) -cdef _create_polygon_geometries(node_coordinates, way_elements): - cdef long long node - cdef list coords - cdef int n = len(way_elements['id']) - cdef int i, ii, nn - geometries = [] +cpdef create_node_coordinates_lookup(nodes): + return _create_node_coordinates_lookup(nodes) - for i in range(0, n): - nodes_ = way_elements['nodes'][i] - coords = [] - nn = len(nodes_) - for ii in range(0, nn): - node = nodes_[ii] - try: - coords.append((node_coordinates[node][0], - node_coordinates[node][1])) - except: - pass +cpdef create_point_geometries(xarray, yarray): + return _create_point_geometries(xarray, yarray) - if len(coords) > 2: - try: - geometries.append(polygons(coords)) - except GEOSException as e: - # Some geometries might not be valid for creating a Polygon - # These might occur e.g. at the edge of the spatial extent - if "Invalid number of points in LinearRing" in str(e): - geometries.append(None) - else: - raise e - except Exception as e: + +cdef create_linestring_geometry(nodes, node_coordinates): + + coords = [] + n = len(nodes) + for i in range(0, n): + node = nodes[i] + try: + coords.append((node_coordinates[node][0], + node_coordinates[node][1])) + except: + pass + + if len(coords) > 1: + try: + return linestrings(coords) + except GEOSException as e: + if "Invalid number of points" in str(e): + return None + else: raise e + except Exception as e: + raise e - else: - geometries.append(None) + else: + return None - return to_shapely(geometries) +cdef create_polygon_geometry(nodes, node_coordinates): + cdef int i, n = len(nodes) + coords = [] + for i in range(0, n): + node = nodes[i] + try: + coords.append((node_coordinates[node][0], + node_coordinates[node][1])) + except: + pass + + if len(coords) > 2: + try: + return polygons(coords) + except GEOSException as e: + # Some geometries might not be valid for creating a Polygon + # These might occur e.g. at the edge of the spatial extent + if "Invalid number of points in LinearRing" in str(e): + return None + else: + raise e + except Exception as e: + raise e + else: + return None +cdef _create_way_geometries(node_coordinates, way_elements): + # Info for constructing geometries: + # https://wiki.openstreetmap.org/wiki/Way -cpdef create_node_coordinates_lookup(nodes): - return _create_node_coordinates_lookup(nodes) + cdef long long node + cdef list coords + cdef int n = len(way_elements['id']) + cdef int i + geometries = [] -cpdef create_point_geometries(xarray, yarray): - return _create_point_geometries(xarray, yarray) + for i in range(0, n): + nodes = way_elements['nodes'][i] + coords = [] + # If first and last node are the same, it's a closed way + if nodes[0] == nodes[-1]: + tag_keys = way_elements.keys() + # Create Polygon unless way is of type 'highway' or 'barrier' + if "highway" in tag_keys or "barrier" in tag_keys: + geom = create_linestring_geometry(nodes, node_coordinates) + else: + geom = create_polygon_geometry(nodes, node_coordinates) -cpdef create_way_geometries(node_coordinates, way_elements): - return _create_way_geometries(node_coordinates, way_elements) + # Otherwise create LineString + else: + geom = create_linestring_geometry(nodes, node_coordinates) + geometries.append(geom) -cpdef create_polygon_geometries(node_coordinates, way_elements): - return _create_polygon_geometries(node_coordinates, way_elements, ) + return to_shapely(geometries) + +cpdef create_way_geometries(node_coordinates, way_elements): + return _create_way_geometries(node_coordinates, way_elements) \ No newline at end of file diff --git a/pyrosm/landuse.py b/pyrosm/landuse.py new file mode 100644 index 0000000..5693467 --- /dev/null +++ b/pyrosm/landuse.py @@ -0,0 +1,40 @@ +from pyrosm.data_manager import get_osm_data +from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter +import geopandas as gpd +import warnings + + +def get_landuse_data(nodes, node_coordinates, way_records, relations, tags_as_columns, custom_filter): + # If custom_filter has not been defined, initialize with default + if custom_filter is None: + custom_filter = {"landuse": True} + else: + # Check that the custom filter is in correct format + validate_custom_filter(custom_filter) + + # Ensure that the "landuse" tag exists + if "landuse" not in custom_filter.keys(): + custom_filter["landuse"] = True + + # Call signature for fetching buildings + nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, + way_records=way_records, + relations=relations, + tags_as_columns=tags_as_columns, + data_filter=custom_filter, + filter_type="keep", + osm_keys=None + ) + + # If there weren't any data, return empty GeoDataFrame + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any landuse elements for given area.", + UserWarning, + stacklevel=2) + return gpd.GeoDataFrame() + + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + return gdf diff --git a/pyrosm/natural.py b/pyrosm/natural.py new file mode 100644 index 0000000..74cbbdc --- /dev/null +++ b/pyrosm/natural.py @@ -0,0 +1,40 @@ +from pyrosm.data_manager import get_osm_data +from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter +import geopandas as gpd +import warnings + + +def get_natural_data(nodes, node_coordinates, way_records, relations, tags_as_columns, custom_filter): + # If custom_filter has not been defined, initialize with default + if custom_filter is None: + custom_filter = {"natural": True} + else: + # Check that the custom filter is in correct format + validate_custom_filter(custom_filter) + + # Ensure that the "landuse" tag exists + if "natural" not in custom_filter.keys(): + custom_filter["natural"] = True + + # Call signature for fetching buildings + nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, + way_records=way_records, + relations=relations, + tags_as_columns=tags_as_columns, + data_filter=custom_filter, + filter_type="keep", + osm_keys=None + ) + + # If there weren't any data, return empty GeoDataFrame + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any natural elements for given area.", + UserWarning, + stacklevel=2) + return gpd.GeoDataFrame() + + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + return gdf diff --git a/pyrosm/networks.py b/pyrosm/networks.py index ed68481..95c4bc0 100644 --- a/pyrosm/networks.py +++ b/pyrosm/networks.py @@ -1,6 +1,5 @@ from pyrosm.data_manager import get_osm_data -from pyrosm.frames import create_gdf -from pyrosm.geometry import create_way_geometries +from pyrosm.frames import prepare_geodataframe import geopandas as gpd import warnings @@ -27,11 +26,9 @@ def get_network_data(node_coordinates, way_records, tags_as_columns, network_fil stacklevel=2) return gpd.GeoDataFrame() - geometries = create_way_geometries(node_coordinates, - ways) + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + return gdf - # Convert to GeoDataFrame - gdf = create_gdf(ways, geometries) - gdf = gdf.dropna(subset=['geometry']).reset_index(drop=True) - return gdf diff --git a/pyrosm/pois.py b/pyrosm/pois.py index 545047e..60d82f1 100644 --- a/pyrosm/pois.py +++ b/pyrosm/pois.py @@ -1,13 +1,13 @@ from pyrosm.data_manager import get_osm_data -from pyrosm.geometry import create_polygon_geometries -from pyrosm.frames import create_gdf, create_nodes_gdf -from pyrosm.relations import prepare_relations +from pyrosm.frames import prepare_geodataframe +from pyrosm.utils import validate_custom_filter import geopandas as gpd -import pandas as pd import warnings def get_poi_data(nodes, node_coordinates, way_records, relations, tags_as_columns, custom_filter): + # Validate filter + validate_custom_filter(custom_filter) # Call signature for fetching POIs nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, @@ -26,35 +26,8 @@ def get_poi_data(nodes, node_coordinates, way_records, relations, tags_as_column stacklevel=2) return gpd.GeoDataFrame() - if nodes is not None: - # Create GeoDataFrame from nodes - node_gdf = create_nodes_gdf(nodes) - node_gdf['osm_type'] = "node" - else: - node_gdf = gpd.GeoDataFrame() + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) - if ways is not None: - # Create geometries for normal ways - geometries = create_polygon_geometries(node_coordinates, - ways) - # Convert to GeoDataFrame - way_gdf = create_gdf(ways, geometries) - node_gdf['osm_type'] = "way" - else: - way_gdf = gpd.GeoDataFrame() - - # Prepare relation data if it is available - if relations is not None: - relations = prepare_relations(relations, relation_ways, - node_coordinates, - tags_as_columns) - relation_gdf = gpd.GeoDataFrame(relations) - node_gdf['osm_type'] = "relation" - - else: - relation_gdf = gpd.GeoDataFrame() - - # Merge all - gdf = pd.concat([node_gdf, way_gdf, relation_gdf]) - gdf = gdf.dropna(subset=['geometry']) return gdf diff --git a/pyrosm/pyrosm.py b/pyrosm/pyrosm.py index 0246198..8b83bd3 100644 --- a/pyrosm/pyrosm.py +++ b/pyrosm/pyrosm.py @@ -1,13 +1,19 @@ from pyrosm.config import Conf from pyrosm.pbfreader import parse_osm_data -from pyrosm.networks import get_network_data -from pyrosm.buildings import get_building_data from pyrosm._arrays import concatenate_dicts_of_arrays -from pyrosm.pois import get_poi_data from pyrosm.geometry import create_node_coordinates_lookup from pyrosm.frames import create_nodes_gdf +from pyrosm.utils import validate_custom_filter, validate_osm_keys, \ + validate_tags_as_columns, validate_booleans from shapely.geometry import Polygon, MultiPolygon +from pyrosm.buildings import get_building_data +from pyrosm.landuse import get_landuse_data +from pyrosm.natural import get_natural_data +from pyrosm.networks import get_network_data +from pyrosm.pois import get_poi_data +from pyrosm.user_defined import get_user_defined_data + class OSM: from pyrosm.utils._compat import PYGEOS_SHAPELY_COMPAT @@ -72,9 +78,9 @@ def _read_pbf(self): def _get_network_filter(self, net_type): possible_filters = [a for a in self.conf.network_filters.__dir__() if "__" not in a] - possible_filters += ["all"] + possible_filters += ["all", "driving+service"] possible_values = ", ".join(possible_filters) - msg = "'net_type' should be one of the following: " + possible_values + msg = "'network_type' should be one of the following: " + possible_values if not isinstance(net_type, str): raise ValueError(msg) @@ -88,6 +94,8 @@ def _get_network_filter(self, net_type): return self.conf.network_filters.walking elif net_type == "driving": return self.conf.network_filters.driving + elif net_type == "driving+service": + return self.conf.network_filters.driving_psv elif net_type == "cycling": return self.conf.network_filters.cycling elif net_type == "all": @@ -141,6 +149,13 @@ def get_buildings(self, custom_filter=None): To keep only specific buildings such as 'residential' and 'retail', you can apply a custom filter which is a Python dictionary with following format: `custom_filter={'building': ['residential', 'retail']}` + + Further info + ------------ + + See OSM documentation for details about the data: + https://wiki.openstreetmap.org/wiki/Key:building + """ # Default tags to keep as columns tags_as_columns = self.conf.tags.building @@ -161,6 +176,98 @@ def get_buildings(self, custom_filter=None): gdf = gdf.drop("nodes", axis=1) return gdf + def get_landuse(self, custom_filter=None): + """ + Parses landuse from OSM. + + Parameters + ---------- + + custom_filter : dict + What kind of landuse to parse, see details below. + + You can opt-in specific elements by using 'custom_filter'. + To keep only specific landuse such as 'construction' and 'industrial', you can apply + a custom filter which is a Python dictionary with following format: + `custom_filter={'landuse': ['construction', 'industrial']}` + + Further info + ------------ + + See OSM documentation for details about the data: + https://wiki.openstreetmap.org/wiki/Key:landuse + """ + + if self._nodes is None or self._way_records is None: + self._read_pbf() + + # Default tags to keep as columns + tags_as_columns = self.conf.tags.landuse + + # If nodes are still in chunks, merge before passing forward + if isinstance(self._nodes, list): + self._nodes = concatenate_dicts_of_arrays(self._nodes) + + gdf = get_landuse_data(self._nodes, + self._node_coordinates, + self._way_records, + self._relations, + tags_as_columns, + custom_filter) + + # Do not keep node information unless specifically asked for + # (they are in a list, and can cause issues when saving the files) + if not self.keep_node_info: + if "nodes" in gdf.columns: + gdf = gdf.drop("nodes", axis=1) + return gdf + + def get_natural(self, custom_filter=None): + """ + Parses natural from OSM. + + Parameters + ---------- + + custom_filter : dict + What kind of natural to parse, see details below. + + You can opt-in specific elements by using 'custom_filter'. + To keep only specific natural such as 'wood' and 'tree', you can apply + a custom filter which is a Python dictionary with following format: + `custom_filter={'natural': ['wood', 'tree']}` + + Further info + ------------ + + See OSM documentation for details about the data: + https://wiki.openstreetmap.org/wiki/Key:natural + """ + + if self._nodes is None or self._way_records is None: + self._read_pbf() + + # Default tags to keep as columns + tags_as_columns = self.conf.tags.natural + + # If nodes are still in chunks, merge before passing forward + if isinstance(self._nodes, list): + self._nodes = concatenate_dicts_of_arrays(self._nodes) + + gdf = get_natural_data(self._nodes, + self._node_coordinates, + self._way_records, + self._relations, + tags_as_columns, + custom_filter) + + # Do not keep node information unless specifically asked for + # (they are in a list, and can cause issues when saving the files) + if not self.keep_node_info: + if "nodes" in gdf.columns: + gdf = gdf.drop("nodes", axis=1) + return gdf + def get_pois(self, custom_filter=None): """ Parse Point of Interest (POI) from OSM. @@ -262,8 +369,106 @@ def get_pois(self, custom_filter=None): gdf = gdf.drop("nodes", axis=1) return gdf - def get_landuse(self, custom_filter=None): - raise NotImplementedError() + def get_osm_by_custom_criteria(self, + custom_filter, + osm_keys_to_keep=None, + filter_type="keep", + tags_as_columns=None, + keep_nodes=True, + keep_ways=True, + keep_relations=True): + """ + Parse OSM data based on custom criteria. + + Parameters + ---------- + + custom_filter : dict (required) + A custom filter to filter only specific POIs from OpenStreetMap. + + osm_keys_to_keep : str | list + A filter to specify which OSM keys should be kept. + + filter_type : str + "keep" | "exclude" + Whether the filters should be used to keep or exclude the data from OSM. + + tags_as_columns : list + Which tags should be kept as columns in the resulting GeoDataFrame. + + keep_nodes : bool + Whether or not the nodes should be kept in the resulting GeoDataFrame if they are found. + + keep_ways : bool + Whether or not the ways should be kept in the resulting GeoDataFrame if they are found. + + keep_relations : bool + Whether or not the relations should be kept in the resulting GeoDataFrame if they are found. + + """ + + # Check that the custom filter is in correct format + validate_custom_filter(custom_filter) + + if not isinstance(filter_type, str): + raise ValueError("'filter_type' -parameter should be either 'keep' or 'exclude'. ") + + # Validate osm keys + validate_osm_keys(osm_keys_to_keep) + if isinstance(osm_keys_to_keep, str): + osm_keys_to_keep = [osm_keys_to_keep] + + # Validate filter + filter_type = filter_type.lower() + if filter_type not in ["keep", "exclude"]: + raise ValueError("'filter_type' -parameter should be either 'keep' or 'exclude'. ") + + # Tags to keep as columns + if tags_as_columns is None: + tags_as_columns = [] + for k in custom_filter.keys(): + try: + tags_as_columns += getattr(self.conf.tags, k) + except Exception as e: + pass + # If tags weren't available in conf, store keys as columns by default + # (all other tags in such cases will be stored in 'tags' column as JSON) + if len(tags_as_columns) == 0: + tags_as_columns = list(custom_filter.keys()) + + else: + # Validate tags + validate_tags_as_columns(tags_as_columns) + + # Validate booleans + validate_booleans(keep_nodes, keep_ways, keep_relations) + + if self._nodes is None or self._way_records is None: + self._read_pbf() + + # If nodes are still in chunks, merge before passing forward + if isinstance(self._nodes, list): + self._nodes = concatenate_dicts_of_arrays(self._nodes) + + gdf = get_user_defined_data(self._nodes, + self._node_coordinates, + self._way_records, + self._relations, + tags_as_columns, + custom_filter, + osm_keys_to_keep, + filter_type, + keep_nodes, + keep_ways, + keep_relations + ) + + # Do not keep node information unless specifically asked for + # (they are in a list, and can cause issues when saving the files) + if not self.keep_node_info: + if "nodes" in gdf.columns: + gdf = gdf.drop("nodes", axis=1) + return gdf def __getattribute__(self, name): # If node-gdf is requested convert to gdf before returning diff --git a/pyrosm/user_defined.py b/pyrosm/user_defined.py new file mode 100644 index 0000000..553fde1 --- /dev/null +++ b/pyrosm/user_defined.py @@ -0,0 +1,59 @@ +from pyrosm.data_manager import get_osm_data +from pyrosm.frames import prepare_geodataframe +import geopandas as gpd +import warnings + + +def get_user_defined_data(nodes, + node_coordinates, + way_records, + relations, + tags_as_columns, + custom_filter, + osm_keys, + filter_type, + keep_nodes, + keep_ways, + keep_relations): + + if not keep_nodes: + nodes = None + + # If wanting to parse relations but not ways, + # it is still necessary to parse ways as well at this point + if keep_ways is False and keep_relations is True: + pass + # If ways are not wanted, neither should relations be parsed + elif not keep_ways: + way_records = None + relations = None + + if not keep_relations: + relations = None + + # Call signature for fetching POIs + nodes, ways, relation_ways, relations = get_osm_data(node_arrays=nodes, + way_records=way_records, + relations=relations, + tags_as_columns=tags_as_columns, + data_filter=custom_filter, + filter_type=filter_type, + osm_keys=osm_keys, + ) + + # If there weren't any data, return empty GeoDataFrame + if nodes is None and ways is None and relations is None: + warnings.warn("Could not find any OSM data for given area.", + UserWarning, + stacklevel=2) + return gpd.GeoDataFrame() + + # Ensure that ways are None if returning those are not requested + if not keep_ways: + ways = None + + # Prepare GeoDataFrame + gdf = prepare_geodataframe(nodes, node_coordinates, ways, + relations, relation_ways, tags_as_columns) + + return gdf diff --git a/pyrosm/utils/__init__.py b/pyrosm/utils/__init__.py index e69de29..8b788fe 100644 --- a/pyrosm/utils/__init__.py +++ b/pyrosm/utils/__init__.py @@ -0,0 +1,53 @@ +def validate_custom_filter(custom_filter): + # Check that the custom filter is in correct format + if not isinstance(custom_filter, dict): + raise ValueError(f"'custom_filter' should be a Python dictionary. " + f"Got {custom_filter} with type {type(custom_filter)}.") + + for k, v in custom_filter.items(): + if not isinstance(k, str): + raise ValueError(f"OSM key in 'custom_filter' should be string. " + f"Got {k} of type {type(k)}") + if v is True: + continue + + if not isinstance(v, list): + raise ValueError(f"OSM tags in 'custom_filter' should be inside a list. " + f"Got {v} of type {type(v)}") + + for item in v: + if not isinstance(item, str): + raise ValueError(f"OSM tag (value) in 'custom_filter' should be string. " + f"Got {item} of type {type(item)}") + + +def validate_osm_keys(osm_keys): + if osm_keys is not None: + if type(osm_keys) not in [str, list]: + raise ValueError(f"'osm_keys_to_keep' -parameter should be of type str or list. " + f"Got {osm_keys} of type {type(osm_keys)}.") + + +def validate_tags_as_columns(tags_as_columns): + if not isinstance(tags_as_columns, list): + raise ValueError(f"'tags_as_columns' should be a list. " + f"Got {tags_as_columns} of type {type(tags_as_columns)}.") + for col in tags_as_columns: + if not isinstance(col, str): + raise ValueError(f"All tags listed in 'tags_as_columns' should be strings. " + f"Got {col} of type {type(col)}.") + + +def validate_booleans(keep_nodes, keep_ways, keep_relations): + if not isinstance(keep_nodes, bool): + raise ValueError("'keep_nodes' should be boolean type: True or False") + + if not isinstance(keep_ways, bool): + raise ValueError("'keep_ways' should be boolean type: True or False") + + if not isinstance(keep_relations, bool): + raise ValueError("'keep_relations' should be boolean type: True or False") + + if keep_nodes is False and keep_ways is False and keep_relations is False: + raise ValueError("At least on of the following parameters should be True: " + "'keep_nodes', 'keep_ways', or 'keep_relations'") \ No newline at end of file diff --git a/setup.py b/setup.py index b0acf98..df785bb 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ def read_long_description(): setup( name='pyrosm', - version='0.3.2', + version='0.4.0', license='MIT', description='A Python tool to parse OSM data from Protobuf format into GeoDataFrame.', long_description=read_long_description(), diff --git a/tests/test_building_parsing.py b/tests/test_building_parsing.py index 5d88d65..95ba038 100644 --- a/tests/test_building_parsing.py +++ b/tests/test_building_parsing.py @@ -46,7 +46,7 @@ def test_parsing_building_elements(test_pbf): def test_creating_building_geometries(test_pbf): from pyrosm import OSM from pyrosm.data_manager import get_osm_data - from pyrosm.geometry import create_polygon_geometries + from pyrosm.geometry import create_way_geometries from numpy import ndarray from shapely.geometry import Polygon @@ -61,8 +61,8 @@ def test_creating_building_geometries(test_pbf): filter_type="keep") assert isinstance(ways, dict) - geometries = create_polygon_geometries(osm._node_coordinates, - ways) + geometries = create_way_geometries(osm._node_coordinates, + ways) assert isinstance(geometries, ndarray) assert isinstance(geometries[0], Polygon) assert len(geometries) == len(ways["id"]) @@ -204,3 +204,26 @@ def test_reading_buildings_from_area_having_none(helsinki_pbf): # Result should be empty GeoDataFrame assert isinstance(gdf, GeoDataFrame) assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_buildings(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e + + +def test_passing_custom_filter_without_element_key(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + osm = OSM(filepath=test_pbf) + gdf = osm.get_buildings(custom_filter={"start_date": True}) + assert isinstance(gdf, GeoDataFrame) + diff --git a/tests/test_custom_filter.py b/tests/test_custom_filter.py new file mode 100644 index 0000000..43fe8ac --- /dev/null +++ b/tests/test_custom_filter.py @@ -0,0 +1,425 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + +@pytest.fixture +def default_filter(): + return {"amenity": True, + "craft": True, + "historic": True, + "leisure": True, + "shop": True, + "tourism": True + } + + +@pytest.fixture +def test_output_dir(): + import os, tempfile + return os.path.join(tempfile.gettempdir(), "pyrosm_test_results") + + +def test_parsing_osm_with_custom_filter_by_excluding_tags(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + import pyproj + osm = OSM(filepath=test_pbf) + + # Keep only building as column + tags_as_columns = ["building"] + # Get all buildings except "residential" + custom_filter = {"building": ["residential"]} + filter_type = "exclude" + osm_type = "building" + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + filter_type=filter_type, + osm_keys_to_keep=osm_type, + tags_as_columns=tags_as_columns + ) + + assert isinstance(gdf, GeoDataFrame) + + # Only following columns should exist after specifying tags_as_columns + allowed_columns = ["geometry", "tags", "building", "id", "osm_type", + "version", "timestamp", "changeset"] + for col in gdf.columns: + assert col in allowed_columns + + # Building columns should not have any "residential" tags + assert "residential" not in gdf["building"].tolist() + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 1049 + assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_parsing_osm_with_custom_filter_by_including_tags(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + import pyproj + osm = OSM(filepath=test_pbf) + + # Keep only building as column + tags_as_columns = ["building"] + # Get all buildings that are "retail" + custom_filter = {"building": ["retail"]} + filter_type = "keep" + osm_type = "building" + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + filter_type=filter_type, + osm_keys_to_keep=osm_type, + tags_as_columns=tags_as_columns + ) + + assert isinstance(gdf, GeoDataFrame) + + # Only following columns should exist after specifying tags_as_columns + allowed_columns = ["geometry", "tags", "building", "id", "osm_type", + "version", "timestamp", "changeset"] + for col in gdf.columns: + assert col in allowed_columns + + # Building columns should not have any "residential" tags + assert len(gdf["building"].unique()) == 1 + assert gdf["building"].unique()[0] == "retail" + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 2 + assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_using_incorrect_filter(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + # Test that passing incorrect data works as should + # 1. + custom_filter = None + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "should be a Python dictionary" in str(e): + pass + else: + raise e + + custom_filter = {"building": [1]} + # 2. + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "string" in str(e): + pass + else: + raise e + + custom_filter = {"building": ["correct_string", 1]} + # 3. + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "string" in str(e): + pass + else: + raise e + # 4. + custom_filter = {0: ["residential"]} + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter) + except ValueError as e: + if "string" in str(e): + pass + else: + raise e + + +def test_using_incorrect_tags(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + # Incorrect tags + # -------------- + tags_as_columns = [1] + custom_filter = {"building": ["retail"]} + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + tags_as_columns=tags_as_columns + ) + except ValueError as e: + if "All tags listed in 'tags_as_columns' should be strings" in str(e): + pass + else: + raise e + + +def test_using_incorrect_filter_type(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + custom_filter = {"building": ["retail"]} + filter_type = "incorrect_test" + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + filter_type=filter_type + ) + except ValueError as e: + if "should be either 'keep' or 'exclude'" in str(e): + pass + else: + raise e + + +def test_using_incorrect_booleans(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + custom_filter = {"building": ["retail"]} + incorrect_bool = "foo" + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_nodes=incorrect_bool + ) + except ValueError as e: + if "'keep_nodes' should be boolean type: True or False" in str(e): + pass + else: + raise e + + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_ways=incorrect_bool + ) + except ValueError as e: + if "'keep_ways' should be boolean type: True or False" in str(e): + pass + else: + raise e + + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_relations=incorrect_bool + ) + except ValueError as e: + if "'keep_relations' should be boolean type: True or False" in str(e): + pass + else: + raise e + + + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + keep_relations=False, + keep_ways=False, + keep_nodes=False + ) + except ValueError as e: + if "At least on of the following parameters should be True" in str(e): + pass + else: + raise e + + +def test_using_incorrect_osm_keys(test_pbf): + from pyrosm import OSM + osm = OSM(filepath=test_pbf) + + osm_keys = 1 + custom_filter = {"building": ["retail"]} + # Test that passing incorrect data works as should + try: + gdf = osm.get_osm_by_custom_criteria(custom_filter=custom_filter, + osm_keys_to_keep=osm_keys + ) + except ValueError as e: + if "'osm_keys_to_keep' -parameter should be of type str or list." in str(e): + pass + else: + raise e + + +def test_reading_with_custom_filters_with_including(test_pbf): + from pyrosm import OSM + from shapely.geometry import Polygon + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=test_pbf) + gdf_all = osm.get_buildings() + + # Find out all 'building' tags + cnts = gdf_all['building'].value_counts() + for filter_, cnt in cnts.items(): + # Use the custom filter + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': [filter_]}, + filter_type="keep") + + assert isinstance(filtered, GeoDataFrame) + assert isinstance(filtered.loc[0, "geometry"], Polygon) + assert len(filtered) == cnt + # Now should only have buildings with given key + assert len(filtered["building"].unique()) == 1 + + required_cols = ['building', 'id', 'timestamp', 'version', 'geometry'] + + for col in required_cols: + assert col in filtered.columns + + +def test_reading_with_custom_filters_with_excluding(test_pbf): + from pyrosm import OSM + from shapely.geometry import Polygon + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=test_pbf) + gdf_all = osm.get_buildings() + + # Find out all 'building' tags + cnts = gdf_all['building'].value_counts() + n = len(gdf_all) + for filter_, cnt in cnts.items(): + # Use the custom filter + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': [filter_]}, + filter_type="exclude") + + assert isinstance(filtered, GeoDataFrame) + assert isinstance(filtered.loc[0, "geometry"], Polygon) + assert len(filtered) == n - cnt + # Now should not have the filter_ in buildings + assert filter_ not in filtered["building"].unique() + + required_cols = ['building', 'id', 'timestamp', 'version', 'geometry'] + + for col in required_cols: + assert col in filtered.columns + + +def test_reading_with_custom_filters_selecting_specific_osm_element(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=helsinki_pbf) + + # Test getting only relations + # --------------------------- + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': True}, + filter_type="keep", + keep_nodes=False, + keep_ways=False, + keep_relations=True) + assert isinstance(filtered, GeoDataFrame) + + # Now should only have 'relation' osm_type + assert len(filtered['osm_type'].unique()) == 1 + assert filtered['osm_type'].unique()[0] == 'relation' + assert len(filtered) == 64 + + # Test getting only ways + # --------------------------- + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': True}, + filter_type="keep", + keep_nodes=False, + keep_ways=True, + keep_relations=False) + assert isinstance(filtered, GeoDataFrame) + + # Now should only have 'way' osm_type + assert len(filtered['osm_type'].unique()) == 1 + assert filtered['osm_type'].unique()[0] == 'way' + assert len(filtered) == 422 + + # Test getting only nodes + # --------------------------- + filtered = osm.get_osm_by_custom_criteria(custom_filter={'building': True}, + filter_type="keep", + keep_nodes=True, + keep_ways=False, + keep_relations=False) + assert isinstance(filtered, GeoDataFrame) + + # Now should only have 'node' osm_type + assert len(filtered['osm_type'].unique()) == 1 + assert filtered['osm_type'].unique()[0] == 'node' + assert len(filtered) == 36 + + +def test_custom_filters_with_custom_keys(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Get first all data + osm = OSM(filepath=helsinki_pbf) + + # Test reading public transport related data + filtered = osm.get_osm_by_custom_criteria(custom_filter={'public_transport': True}, + filter_type="keep", + ) + assert isinstance(filtered, GeoDataFrame) + assert len(filtered) == 112 + + # Test a more complicated query + # ----------------------------- + + # Test reading all transit related data (bus, trains, trams, metro etc.) + # Exclude nodes (not keeping stops, etc.) + routes = ["bus", "ferry", "railway", "subway", "train", "tram", "trolleybus"] + rails = ["tramway", "light_rail", "rail", "subway", "tram"] + bus = ['yes'] + + transit = osm.get_osm_by_custom_criteria(custom_filter={ + 'route': routes, + 'railway': rails, + 'bus': bus, + 'public_transport': True}, + filter_type="keep", + keep_nodes=False) + + assert isinstance(transit, GeoDataFrame) + assert len(transit) == 374 + + +def test_reading_custom_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.940514, 60.173849, 24.942, 60.175892] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_osm_by_custom_criteria({"highway": ["primary"]}) + # Check the warning text + if "could not find any OSM data" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) \ No newline at end of file diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 0000000..cb2b3be --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,38 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + +def test_available(): + import pyrosm + assert isinstance(pyrosm.data.available, list) + + +def test_not_available(): + try: + get_path("file_not_existing") + except ValueError as e: + if "is not available" in str(e): + pass + else: + raise e + except Exception as e: + raise e + + +def test_temp_dir(): + import pyrosm + import os + assert os.path.isdir(os.path.dirname( + pyrosm.data._temp_path)) diff --git a/tests/test_landuse_parsing.py b/tests/test_landuse_parsing.py index e69de29..f141909 100644 --- a/tests/test_landuse_parsing.py +++ b/tests/test_landuse_parsing.py @@ -0,0 +1,87 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + +def test_parsing_landuse_with_defaults(test_pbf): + from pyrosm import OSM + from pyrosm.landuse import get_landuse_data + from geopandas import GeoDataFrame + import pyproj + from pyrosm._arrays import concatenate_dicts_of_arrays + osm = OSM(filepath=test_pbf) + osm._read_pbf() + tags_as_columns = osm.conf.tags.landuse + + nodes = concatenate_dicts_of_arrays(osm._nodes) + gdf = get_landuse_data(nodes, + osm._node_coordinates, + osm._way_records, + osm._relations, + tags_as_columns, + None) + + assert isinstance(gdf, GeoDataFrame) + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 50 + assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_reading_landuse_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.947241, 60.174997, 24.948240, 60.175716] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_landuse() + # Check the warning text + if "could not find any buildings" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_landuse(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e + + +def test_passing_custom_filter_without_element_key(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + osm = OSM(filepath=test_pbf) + gdf = osm.get_landuse(custom_filter={"leisure": True}) + assert isinstance(gdf, GeoDataFrame) \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..fbbbcbc --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,77 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +def test_network(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_network() + assert isinstance(gdf, GeoDataFrame) + + +def test_buildings(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_buildings() + assert isinstance(gdf, GeoDataFrame) + + +def test_landuse(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_landuse() + assert isinstance(gdf, GeoDataFrame) + + +def test_pois(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_pois() + assert isinstance(gdf, GeoDataFrame) + + +def test_natural(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_natural() + assert isinstance(gdf, GeoDataFrame) + + +def test_custom(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + osm = OSM(test_pbf) + gdf = osm.get_osm_by_custom_criteria({"highway": ["secondary"]}) + assert isinstance(gdf, GeoDataFrame) + + +def test_passing_incorrect_filepath(): + from pyrosm import OSM + try: + OSM(11) + except ValueError: + pass + except Exception as e: + raise e + + +def test_passing_wrong_file_format(): + from pyrosm import OSM + try: + OSM("test.osm") + except ValueError: + pass + except Exception as e: + raise e + diff --git a/tests/test_natural_parsing.py b/tests/test_natural_parsing.py new file mode 100644 index 0000000..910034a --- /dev/null +++ b/tests/test_natural_parsing.py @@ -0,0 +1,78 @@ +import pytest +from pyrosm import get_path + + +@pytest.fixture +def test_pbf(): + pbf_path = get_path("test_pbf") + return pbf_path + + +@pytest.fixture +def helsinki_pbf(): + pbf_path = get_path("helsinki_pbf") + return pbf_path + + +def test_parsing_natural_with_defaults(test_pbf): + from pyrosm import OSM + from pyrosm.natural import get_natural_data + from geopandas import GeoDataFrame + import pyproj + from pyrosm._arrays import concatenate_dicts_of_arrays + osm = OSM(filepath=test_pbf) + osm._read_pbf() + tags_as_columns = osm.conf.tags.natural + + nodes = concatenate_dicts_of_arrays(osm._nodes) + gdf = get_natural_data(nodes, + osm._node_coordinates, + osm._way_records, + osm._relations, + tags_as_columns, + None) + + assert isinstance(gdf, GeoDataFrame) + + # Required keys + required = ['id', 'geometry'] + for col in required: + assert col in gdf.columns + + # Test shape + assert len(gdf) == 14 + assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_reading_natural_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.939753, 60.173388, 24.941269,60.174829] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_natural() + # Check the warning text + if "could not find any buildings" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_natural(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e \ No newline at end of file diff --git a/tests/test_network_parsing.py b/tests/test_network_parsing.py index 1b6afe7..4f52fba 100644 --- a/tests/test_network_parsing.py +++ b/tests/test_network_parsing.py @@ -31,11 +31,11 @@ def test_filter_network_by_walking(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (238, 17) + assert gdf.shape == (238, 18) required_cols = ['access', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'id', - 'geometry', 'tags'] + 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -54,10 +54,35 @@ def test_filter_network_by_driving(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (200, 17) + assert gdf.shape == (200, 18) required_cols = ['access', 'bridge', 'highway', 'int_ref', 'lanes', 'lit', 'maxspeed', - 'name', 'oneway', 'ref', 'service', 'surface', 'id', 'geometry', 'tags'] + 'name', 'oneway', 'ref', 'service', 'surface', 'id', 'geometry', 'tags', + 'osm_type'] + for col in required_cols: + assert col in gdf.columns + + # Should not include 'footway' or 'path' ways by default + assert "footway" not in gdf["highway"].unique() + assert "path" not in gdf["highway"].unique() + + +def test_filter_network_by_driving_with_service_roads(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + from shapely.geometry import LineString + osm = OSM(filepath=test_pbf) + gdf = osm.get_network(network_type="driving+service") + + assert isinstance(gdf.loc[0, 'geometry'], LineString) + assert isinstance(gdf, GeoDataFrame) + + # Test shape + assert gdf.shape == (200, 18) + + required_cols = ['access', 'bridge', 'highway', 'int_ref', 'lanes', 'lit', 'maxspeed', + 'name', 'oneway', 'ref', 'service', 'surface', 'id', 'geometry', 'tags', + 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -77,11 +102,11 @@ def test_filter_network_by_cycling(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (290, 19) + assert gdf.shape == (290, 20) required_cols = ['access', 'bicycle', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'tunnel', - 'id', 'geometry', 'tags'] + 'id', 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -90,12 +115,31 @@ def test_filter_network_by_cycling(test_pbf): assert "motorway_link" not in gdf["highway"].unique() +def test_filter_network_by_all(test_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + from shapely.geometry import LineString + osm = OSM(filepath=test_pbf) + gdf = osm.get_network(network_type="all") + + assert isinstance(gdf.loc[0, 'geometry'], LineString) + assert isinstance(gdf, GeoDataFrame) + + # Test shape + assert gdf.shape == (331, 21) + + required_cols = ['access', 'bicycle', 'bridge', 'foot', 'highway', 'lanes', 'lit', + 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'tunnel', + 'id', 'geometry', 'tags', 'osm_type'] + for col in required_cols: + assert col in gdf.columns + + def test_saving_network_to_shapefile(test_pbf, test_output_dir): import os from pyrosm import OSM import geopandas as gpd import shutil - from pandas.testing import assert_frame_equal if not os.path.exists(test_output_dir): os.makedirs(test_output_dir) @@ -108,15 +152,9 @@ def test_saving_network_to_shapefile(test_pbf, test_output_dir): # Ensure it can be read and matches with original one gdf2 = gpd.read_file(temp_path) - # When reading integers they - # might be imported as strings instead of ints which is - # normal, however, the values should be identical - convert_to_ints = ["id", "timestamp", "version"] - for col in convert_to_ints: - gdf[col] = gdf[col].astype(int) - gdf2[col] = gdf2[col].astype(int) - - assert_frame_equal(gdf, gdf2) + cols = gdf.columns + for col in cols: + assert gdf[col].tolist() == gdf2[col].tolist() # Clean up shutil.rmtree(test_output_dir) @@ -136,11 +174,11 @@ def test_parse_network_with_bbox(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (65, 17) + assert gdf.shape == (65, 18) required_cols = ['access', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'id', - 'geometry', 'tags'] + 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -168,11 +206,11 @@ def test_parse_network_with_shapely_bbox(test_pbf): assert isinstance(gdf, GeoDataFrame) # Test shape - assert gdf.shape == (65, 17) + assert gdf.shape == (65, 18) required_cols = ['access', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'id', - 'geometry', 'tags'] + 'geometry', 'tags', 'osm_type'] for col in required_cols: assert col in gdf.columns @@ -201,6 +239,31 @@ def test_passing_incorrect_bounding_box(test_pbf): raise e +def test_passing_incorrect_net_type(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_network("wrong_network") + except ValueError as e: + if "'network_type' should be one of the following" in str(e): + pass + else: + raise(e) + except Exception as e: + raise e + + try: + osm.get_network(42) + except ValueError as e: + if "'network_type' should be one of the following" in str(e): + pass + else: + raise(e) + except Exception as e: + raise e + + def test_reading_network_from_area_without_data(helsinki_pbf): from pyrosm import OSM from geopandas import GeoDataFrame diff --git a/tests/test_poi_parsing.py b/tests/test_poi_parsing.py index 9330818..571a370 100644 --- a/tests/test_poi_parsing.py +++ b/tests/test_poi_parsing.py @@ -59,5 +59,39 @@ def test_parsing_pois_with_defaults(helsinki_pbf, default_filter): assert col in gdf.columns # Test shape - assert len(gdf) == 1777 + assert len(gdf) == 1780 assert gdf.crs == pyproj.CRS.from_epsg(4326) + + +def test_reading_pois_from_area_having_none(helsinki_pbf): + from pyrosm import OSM + from geopandas import GeoDataFrame + + # Bounding box for area that does not have any data + bbox = [24.940514, 60.173849, 24.942, 60.175892] + + osm = OSM(filepath=helsinki_pbf, bounding_box=bbox) + + # The tool should warn if no buildings were found + with pytest.warns(UserWarning) as w: + gdf = osm.get_pois() + # Check the warning text + if "could not find any buildings" in str(w): + pass + + # Result should be empty GeoDataFrame + assert isinstance(gdf, GeoDataFrame) + assert gdf.shape == (0, 0) + + +def test_passing_incorrect_custom_filter(test_pbf): + from pyrosm import OSM + + osm = OSM(filepath=test_pbf) + try: + osm.get_pois(custom_filter="wrong") + except ValueError as e: + if "dictionary" in str(e): + pass + except Exception as e: + raise e \ No newline at end of file