In [1]:
import geopandas as gpd
import osmium
from shapely.geometry import shape

# WGS84 CRS
WGS84 = 4326

In [2]:
facilities = {
    "bank": [("amenity", "bank")],
    "bar": [("amenity", "bar")],
    "concert_hall": [("amenity", "concert_hall")],
    "hospital": [("amenity", "hospital")],
    "park": [("leisure", "park")],
    "pharmacy": [("amenity", "pharmacy")],
    "school": [("amenity", "school")],
    "sports_field": [("leisure", "pitch")],
}


def way_filter(tags):
    for facility, facility_tags in facilities.items():
        for facility_tag in facility_tags:
            if facility_tag[0] in tags:
                if tags[facility_tag[0]] == facility_tag[1]:
                    return True
    return False

node_filter = way_filter


def facility_from_tags(tags):
    def any_match(tags, facility_tags):
        for key, value in facility_tags:
            if key in tags:
                if tags[key] == value:
                    return True
        return False
    
    for facility, facility_tags in facilities.items():
        if any_match(tags, facility_tags):
            return facility

Note: the parser functions may need to be run multiple times to function. Some kind of caching happening?

In [3]:
nodes = []

class NodeParser(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self)
    
    def node(self, n):
        if not way_filter(n.tags):
            return
        
        loc = n.location
        coords = (float(loc.lon), float(loc.lat))
        
        geometry = {
            "type": "Point",
            "coordinates": coords
        }
        
        facility = facility_from_tags(n.tags)

        d = {
            "geometry": geometry,
            "properties": {
                "osm_id": int(n.id),
                "facility": facility
            }
        }
            
        nodes.append(d)        
    
        del loc
        del n

        
node_parser = NodeParser()
node_parser.apply_file("../artifacts/seattle.osm.pbf")

In [4]:
ways = []

class WayParser(osmium.SimpleHandler):
    def __init__(self):
        osmium.SimpleHandler.__init__(self)

    def way(self, w):
        # FIXME: this will miss multipolygons. Need a relation hook.
        if not way_filter(w.tags):
            return
        
        coords = [(n.lon, n.lat) for n in w.nodes]
        
        # Reject invalid polygons
        if (coords[0][0] != coords[-1][0]) and (coords[0][1] != coords[-1][1]):
            return
        
        geometry = {
            "type": "Polygon",
            "coordinates": [coords]
        }
        
        facility = facility_from_tags(w.tags)

        d = {
            "geometry": geometry,
            "properties": {
                "osm_id": int(w.id),
                "facility": facility
            }
        }

        
        ways.append(d)
        
        del w
        
way_parser = WayParser()
way_parser.apply_file("../artifacts/seattle.osm.pbf", locations=True)

In [5]:
print(f"Nodes: {len(nodes)}")
print(f"Ways: {len(ways)}")

Nodes: 552
Ways: 1649


In [6]:
nodes_shapes = [{**n["properties"], "geometry": shape(n["geometry"])} for n in nodes]
ways_shapes = [{**w["properties"], "geometry": shape(w["geometry"])} for w in ways]

In [7]:
nodes_df = gpd.GeoDataFrame(nodes_shapes)
ways_df = gpd.GeoDataFrame(ways_shapes)
nodes_df.crs = WGS84
ways_df.crs = WGS84

In [8]:
nodes_df.to_file("../artifacts/seattle_nodes.gpkg", driver="GPKG")
ways_df.to_file("../artifacts/seattle_ways.gpkg", driver="GPKG")