# Filter country polygons from division areas

Scan the `type=division_area` GeoParquet files, keep features marked as land countries, and export them to `data/results/country_area.parquet`.

In [None]:
from pathlib import Path

import duckdb

repo_root = Path.cwd().resolve().parents[1]
division_area_dir = repo_root / 'gis_data' / 'overturemaps-us-west-2' / 'release' / '2025-08-20.1' / 'theme=divisions' / 'type=division_area'
source_pattern = str(division_area_dir / '*.parquet')
output_path = repo_root / 'data' / 'results' / 'country_area.parquet'
output_path.parent.mkdir(parents=True, exist_ok=True)

con = duckdb.connect(database=':memory:')
country_df = con.execute(
    '''
    SELECT *
    FROM read_parquet(?)
    WHERE subtype = 'country' AND is_land
    ORDER BY id
    '''
    ,
    [source_pattern],
).fetchdf()
con.close()

country_df.to_parquet(output_path, index=False)
print(f'Retrieved {len(country_df)} country divisions and saved to {output_path}')


In [None]:
from pathlib import Path

import duckdb

repo_root = Path.cwd().resolve().parents[1]
division_area_dir = repo_root / 'gis_data' / 'overturemaps-us-west-2' / 'release' / '2025-08-20.1' / 'theme=divisions' / 'type=division_area'
source_pattern = str(division_area_dir / '*.parquet')
output_path = repo_root / 'data' / 'results' / 'region_area.parquet'
output_path.parent.mkdir(parents=True, exist_ok=True)

con = duckdb.connect(database=':memory:')
region_df = con.execute(
    '''
    SELECT *
    FROM read_parquet(?)
    WHERE subtype = 'region' AND is_land
    ORDER BY id
    '''
    ,
    [source_pattern],
).fetchdf()
con.close()

region_df.to_parquet(output_path, index=False)
print(f'Retrieved {len(region_df)} region divisions and saved to {output_path}')
