In [1]:
import pandas as pd
import geopandas as gpd
import osmium
import shapely.geometry as sgeo
import os

In [2]:
if "snakemake" in locals():
    input_osm_path = snakemake.input["osm"]
    input_zones_path = snakemake.input["zones"]

    output_path = snakemake.output[0]

    zone_attribute = snakemake.params["zone_attribute"]
else:
    input_zones_path = "../../../results/brussels/census/spatial.parquet"
    input_osm_path = "../../../resources/brussels/osm/ile-de-france-latest.osm.pbf"

    output_path = "../../../results/brussels/osm/locations.parquet"

    zone_attribute = "municipality_id"    

In [None]:
# Prepare zoning system
df_zones = gpd.read_parquet(input_zones_path)
df_zones = df_zones.dissolve(zone_attribute).reset_index()[[zone_attribute, "geometry"]]

In [None]:
# Prepare reading of the OSM data
processor = osmium.FileProcessor(input_osm_path)
processor = processor.with_filter(osmium.filter.KeyFilter("building"))
processor = processor.with_locations().with_filter(osmium.filter.GeoInterfaceFilter())

# Obtain centroids of the buildings as point geometries
df_buildings = gpd.GeoDataFrame(pd.DataFrame({
    "geometry": [
        sgeo.shape(item.__geo_interface__["geometry"]).centroid
        for item in processor
    ]
}), crs = "EPSG:4326").to_crs(df_zones.crs)

In [None]:
# Map buildings to zones
df_buildings = gpd.sjoin(df_buildings, df_zones, predicate = "within")

In [None]:
# Output
df_buildings.to_parquet(output_path)