## Create GeoParquet files

In [2]:
import duckdb

# Connect to DuckDB
con = duckdb.connect(database=':memory:')

# Load the necessary extensions
con.execute("INSTALL spatial;")
con.execute("LOAD spatial;")

con.execute("INSTALL httpfs;")
con.execute("LOAD httpfs;")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x211697d9730>

### Downloading data from large GeoParquets to a flatgeobuf

In [15]:

# Set the AWS S3 region
con.execute("SET s3_region='us-west-2';")

# categories.primary schema: https://docs.overturemaps.org/schema/concepts/by-theme/places/

query = """
COPY(                                       -- COPY <query> TO <output> saves the results to disk.
    SELECT
       id,
       names.primary as name,
       confidence AS confidence,
       CAST(socials AS JSON) as socials,    -- Ensure each attribute can be serialized to JSON
       geometry                             -- DuckDB understands this to be a geometry type
    FROM read_parquet('s3://overturemaps-us-west-2/release/2025-02-19.0/theme=places/type=place/*', filename=true, hive_partitioning=1)
    WHERE categories.primary = 'pizza_restaurant' -- brewery 
    AND bbox.xmin BETWEEN 8.0 AND 9.5
    AND bbox.ymin BETWEEN 58.0 AND 59.0

) TO 'agder_pizza.fgb' WITH (FORMAT GDAL, DRIVER 'flatgeobuf', SRS 'EPSG:4326');
"""

con.execute(query)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x211697d9730>

### Converting the flatgeobuf to a geoparquet using geopandas

In [16]:
import geopandas as gpd
fileoriginal = r'./agder_pizza.fgb'
gdf = gpd.read_file(fileoriginal)

gdf.set_crs('EPSG:4326', inplace=True)
gdf.to_parquet(f'./agder_pizza.parquet', compression='snappy',geometry_encoding='WKB',write_covering_bbox=True)


### Visualizing the resulting geoparquet (geopandas dataframe)

In [17]:
gdf.explore()