In [1]:
%pip install osmium

Collecting osmium
  Using cached osmium-4.0.2-cp313-cp313-macosx_11_0_arm64.whl.metadata (3.5 kB)
Collecting requests (from osmium)
  Downloading requests-2.32.4-py3-none-any.whl.metadata (4.9 kB)
Collecting charset_normalizer<4,>=2 (from requests->osmium)
  Using cached charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests->osmium)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests->osmium)
  Using cached urllib3-2.4.0-py3-none-any.whl.metadata (6.5 kB)
Using cached osmium-4.0.2-cp313-cp313-macosx_11_0_arm64.whl (1.1 MB)
Downloading requests-2.32.4-py3-none-any.whl (64 kB)
Using cached charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl (199 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Using cached urllib3-2.4.0-py3-none-any.whl (128 kB)
Installing collected packages: urllib3, idna, charset_normalizer, requests, osmium
[2K   [90m━━━━━━━━━━━━━━━━

In [None]:
import osmium
import json
from shapely.geometry import Point, LineString, Polygon, MultiPolygon
from shapely.wkb import loads as wkb_loads # For converting WKB to Shapely

# --- 1. Define your POI tag criteria ---
POI_KEYS = ['amenity', 'shop', 'tourism', 'leisure']

# --- 2. Create a pyosmium Handler ---
class POIHandler(osmium.SimpleHandler):
    def __init__(self, output_geojson_path):
        super(POIHandler, self).__init__()
        self.output_file = open(output_geojson_path, 'w')
        self.output_file.write('{"type": "FeatureCollection", "features": [\n')
        self.first_feature = True
        self.wkbfab = osmium.geom.WKBFactory() # For converting Osmium geometries to WKB

    def _write_feature(self, osm_id, osm_type, tags, geometry):
        """Helper to write a GeoJSON feature to the output file."""
        if geometry is None:
            return

        # Handle different geometry types for GeoJSON
        if isinstance(geometry, Point):
            geom_dict = {"type": "Point", "coordinates": [geometry.x, geometry.y]}
        elif isinstance(geometry, LineString):
            geom_dict = {"type": "LineString", "coordinates": list(geometry.coords)}
        elif isinstance(geometry, Polygon):
            geom_dict = {
                "type": "Polygon",
                "coordinates": [list(geometry.exterior.coords)] + [list(interior.coords) for interior in geometry.interiors]
            }
        elif isinstance(geometry, MultiPolygon):
            geom_dict = {
                "type": "MultiPolygon",
                "coordinates": [
                    [list(poly.exterior.coords)] + [list(inner.coords) for inner in poly.interiors]
                    for poly in geometry.geoms
                ]
            }
        else:
            # This can happen if a way is an area but fails to form a valid polygon
            # print(f"Warning: Unsupported or invalid geometry type for ID {osm_id}, type {osm_type}: {type(geometry)}")
            return

        feature = {
            "type": "Feature",
            "geometry": geom_dict,
            "properties": {
                "osm_id": osm_id,
                "osm_type": osm_type,
                **dict(tags) # Convert Osmium tags to a regular Python dict
            }
        }
        if not self.first_feature:
            self.output_file.write(',\n')
        json.dump(feature, self.output_file, ensure_ascii=False)
        self.first_feature = False

    def _has_poi_tag(self, osm_object):
        """Checks if an OSM object has any of the defined POI keys."""
        for key in POI_KEYS:
            if key in osm_object.tags:
                return True
        return False

    def node(self, n):
        if self._has_poi_tag(n):
            try:
                point = Point(n.location.lon, n.location.lat)
                self._write_feature(n.id, 'node', n.tags, point)
            except osmium.InvalidLocationError:
                pass

    def way(self, w):
        if self._has_poi_tag(w):
            try:
                if w.is_area():
                    # For closed ways (areas), create a Polygon
                    geom = self.wkbfab.create_polygon(w)
                    shapely_geom = wkb_loads(geom.wkb)
                    self._write_feature(w.id, 'way_area', w.tags, shapely_geom)
                else:
                    # For open ways (lines), create a LineString (e.g., a leisure=track)
                    geom = self.wkbfab.create_linestring(w)
                    shapely_geom = wkb_loads(geom.wkb)
                    self._write_feature(w.id, 'way_line', w.tags, shapely_geom)
            except (osmium.InvalidLocationError, RuntimeError) as e:
                # Catch specific Osmium geometry errors
                # print(f"Error processing way {w.id}: {e}")
                pass

    def area(self, a):
        if a.from_way():
            # This area originated from a closed way.
            # We already handle 'way_area' in the way() method,
            # so we can potentially skip this for simple closed ways to avoid duplicates,
            # unless you specifically want to handle all areas here.
            # For this setup, we'll keep it as is, and rely on the has_poi_tag check.
            pass # Keep this for now, if you want all areas to be processed here

        # This will cover areas formed by relations or any areas not already handled by way()
        if self._has_poi_tag(a):
            try:
                # Create multipolygon geometry for relations
                geom = self.wkbfab.create_multipolygon(a)
                shapely_geom = wkb_loads(geom.wkb)
                # Differentiate between way_area and relation_area for clarity
                # if a.from_way() is True here, it means it's a closed way also handled as an area
                # You might want to assign osm_type based on whether it's from_way() or not
                self._write_feature(a.orig_id(), 'area', a.tags, shapely_geom) # 'area' covers both
            except (osmium.InvalidLocationError, RuntimeError) as e:
                # print(f"Error processing area from relation {a.orig_id()}: {e}")
                pass

    def close(self):
        self.output_file.write('\n]}')
        self.output_file.close()


# --- 3. Execution ---
input_pbf_file = "germany_pois_filtered.osm.pbf" # Make sure this file exists from osmium-tool pre-filtering
output_geojson_file = "germany_all_pois.geojson"

print(f"Starting POI extraction from filtered file: {input_pbf_file}...")

# Instantiate your main handler
poi_handler = POIHandler(output_geojson_file)

# Instantiate the NodeLocationIndex handler for location caching
# 'dense_mmap_array' is a good default for large files, but 'sparse_mem_array' or 'sparse_file_array'
# might be needed if you run into memory issues even with filtered data.
# For Germany, even filtered, this could still be substantial.
location_handler = osmium.make_location_handler('dense_mmap_array') # CORRECTED

# Instantiate the Area handler
# This handler is responsible for assembling areas (multipolygons and closed ways)
area_handler = osmium.Area(poi_handler, idx='sparse_mem_array') # Pass your POIHandler to Area

try:
    # Pass all handlers to osmium.apply() as separate arguments
    # The order generally matters: location handler first, then area handler (which depends on location data),
    # then your custom handler (which depends on both).
    osmium.apply(input_pbf_file, location_handler, area_handler, poi_handler)
finally:
    poi_handler.close() # Ensure the GeoJSON file is properly closed

print(f"POI extraction complete. Results saved to {output_geojson_file}")
print("You can now open 'germany_all_pois.geojson' in a GIS software (like QGIS) or process it further with Python libraries.")

Starting POI extraction from filtered file: germany_pois_filtered.osm.pbf...


AttributeError: module 'osmium' has no attribute 'NodeLocationIndex'