# Inspect Overture divisions GeoParquet files

This notebook scans the `theme=divisions` release folder for GeoParquet files, reports their schema, and shows a handful of sample rows using DuckDB. Each subdirectory is sampled once because the files within share the same structure.

In [1]:
from pathlib import Path
from collections import defaultdict

import duckdb
from IPython.display import display

repo_root = Path.cwd().resolve().parents[1]
base_path = repo_root / 'gis_data' / 'overturemaps-us-west-2' / 'release' / '2025-08-20.1' / 'theme=divisions'
print(f'Base directory: {base_path}')

parquet_files = sorted(base_path.rglob('*.parquet'))
if not parquet_files:
    raise FileNotFoundError('No GeoParquet files found under the divisions theme directory.')

files_by_dir = defaultdict(list)
for file_path in parquet_files:
    files_by_dir[file_path.parent].append(file_path)

print(f'Found {len(parquet_files)} parquet files across {len(files_by_dir)} directories.')
for directory, files in sorted(files_by_dir.items()):
    rel_dir = directory.relative_to(base_path)
    print(f'{rel_dir}: {len(files)} file(s)')


Base directory: /workspaces/micromamba_cuda/gis_data/overturemaps-us-west-2/release/2025-08-20.1/theme=divisions
Found 6 parquet files across 3 directories.
type=division: 1 file(s)
type=division_area: 4 file(s)
type=division_boundary: 1 file(s)


In [2]:
con = duckdb.connect(database=':memory:')

for directory, files in sorted(files_by_dir.items()):
    sample_file = files[0]
    rel_dir = directory.relative_to(base_path)
    print(f"\n=== {rel_dir} ===")
    print(f"Sample file: {sample_file.name}")
    schema_df = con.execute(
        "DESCRIBE SELECT * FROM read_parquet(?)", [str(sample_file)]
    ).fetchdf()
    display(schema_df)
    sample_rows_df = con.execute(
        "SELECT * FROM read_parquet(?) LIMIT 5", [str(sample_file)]
    ).fetchdf()
    display(sample_rows_df)



=== type=division ===
Sample file: part-00000-818ae406-f3a4-4c19-8c52-296da235dae9-c000.zstd.parquet


Unnamed: 0,column_name,column_type,null,key,default,extra
0,id,VARCHAR,YES,,,
1,geometry,BLOB,YES,,,
2,bbox,"STRUCT(xmin FLOAT, xmax FLOAT, ymin FLOAT, yma...",YES,,,
3,country,VARCHAR,YES,,,
4,version,INTEGER,YES,,,
5,sources,"STRUCT(property VARCHAR, dataset VARCHAR, reco...",YES,,,
6,cartography,"STRUCT(prominence INTEGER, min_zoom INTEGER, m...",YES,,,
7,subtype,VARCHAR,YES,,,
8,class,VARCHAR,YES,,,
9,names,"STRUCT(""primary"" VARCHAR, common MAP(VARCHAR, ...",YES,,,


Unnamed: 0,id,geometry,bbox,country,version,sources,cartography,subtype,class,names,...,perspectives,local_type,hierarchies,parent_division_id,norms,population,capital_division_ids,capital_of_divisions,theme,type
0,23e81262-d6ed-45a3-a1a0-4bc6a2a887d8,"[0, 0, 0, 0, 1, 192, 97, 104, 186, 199, 16, 20...","{'xmin': -139.27281188964844, 'xmax': -139.272...",,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...","{'prominence': 20, 'min_zoom': None, 'max_zoom...",locality,village,"{'primary': 'Amundsen–Scott South Pole', 'comm...",...,,{'en': 'village'},[[{'division_id': '23e81262-d6ed-45a3-a1a0-4bc...,,,49.0,,,divisions,division
1,3e5d821e-75ab-4484-aa14-8e0a95886a79,"[0, 0, 0, 0, 1, 192, 102, 17, 243, 238, 229, 3...","{'xmin': -176.56105041503906, 'xmax': -176.561...",NZ,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...","{'prominence': 24, 'min_zoom': None, 'max_zoom...",locality,village,"{'primary': 'Waitangi', 'common': {'mi': 'Wait...",...,,{'en': 'village'},[[{'division_id': '89e73df8-d5ab-4156-9e83-140...,7dace4fd-6acc-440f-af3f-9286b4e92adc,,,,[{'division_id': 'c8cb5c2a-b62c-481f-b170-0124...,divisions,division
2,eb1346d6-7d40-4224-af92-76c9807996c2,"[0, 0, 0, 0, 1, 192, 102, 11, 132, 81, 50, 248...","{'xmin': -176.3599090576172, 'xmax': -176.3598...",NZ,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...","{'prominence': 12, 'min_zoom': None, 'max_zoom...",locality,village,"{'primary': 'Owenga', 'common': {'mi': 'Owenga...",...,,{'en': 'village'},[[{'division_id': '89e73df8-d5ab-4156-9e83-140...,7dace4fd-6acc-440f-af3f-9286b4e92adc,,,,,divisions,division
3,7dace4fd-6acc-440f-af3f-9286b4e92adc,"[0, 0, 0, 0, 1, 192, 102, 12, 78, 162, 132, 23...","{'xmin': -176.38461303710938, 'xmax': -176.384...",NZ,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",,county,,"{'primary': 'Chatham Islands Territory', 'comm...",...,,{'en': 'district'},[[{'division_id': '89e73df8-d5ab-4156-9e83-140...,c8cb5c2a-b62c-481f-b170-0124af43d8b4,,,,,divisions,division
4,c8cb5c2a-b62c-481f-b170-0124af43d8b4,"[0, 0, 0, 0, 1, 192, 102, 5, 254, 176, 116, 16...","{'xmin': -176.18734741210938, 'xmax': -176.187...",NZ,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",,region,,"{'primary': 'Chatham Islands', 'common': {'bs'...",...,,{'en': 'region'},[[{'division_id': '89e73df8-d5ab-4156-9e83-140...,89e73df8-d5ab-4156-9e83-140d1ee694c5,,,[3e5d821e-75ab-4484-aa14-8e0a95886a79],,divisions,division



=== type=division_area ===
Sample file: part-00000-c998b093-fa14-440c-98f0-bbdb2126ed22-c000.zstd.parquet


Unnamed: 0,column_name,column_type,null,key,default,extra
0,id,VARCHAR,YES,,,
1,geometry,BLOB,YES,,,
2,bbox,"STRUCT(xmin FLOAT, xmax FLOAT, ymin FLOAT, yma...",YES,,,
3,country,VARCHAR,YES,,,
4,version,INTEGER,YES,,,
5,sources,"STRUCT(property VARCHAR, dataset VARCHAR, reco...",YES,,,
6,subtype,VARCHAR,YES,,,
7,class,VARCHAR,YES,,,
8,names,"STRUCT(""primary"" VARCHAR, common MAP(VARCHAR, ...",YES,,,
9,is_land,BOOLEAN,YES,,,


Unnamed: 0,id,geometry,bbox,country,version,sources,subtype,class,names,is_land,is_territorial,region,division_id,theme,type
0,e788809d-f32e-43e6-a3d3-890942cad6c4,"[0, 0, 0, 0, 6, 0, 0, 0, 13, 0, 0, 0, 0, 3, 0,...","{'xmin': -176.89370727539062, 'xmax': -175.831...",NZ,2,"[{'property': '', 'dataset': 'OpenStreetMap', ...",region,land,"{'primary': 'Chatham Islands', 'common': {'bs'...",True,False,NZ-CIT,c8cb5c2a-b62c-481f-b170-0124af43d8b4,divisions,division_area
1,54bea793-2dc6-47b0-a4c1-5b96f17e66a3,"[0, 0, 0, 0, 6, 0, 0, 0, 13, 0, 0, 0, 0, 3, 0,...","{'xmin': -176.89370727539062, 'xmax': -175.831...",NZ,2,"[{'property': '', 'dataset': 'OpenStreetMap', ...",county,land,"{'primary': 'Chatham Islands Territory', 'comm...",True,False,NZ-CIT,7dace4fd-6acc-440f-af3f-9286b4e92adc,divisions,division_area
2,8169d711-197f-4dc6-af2e-983e698d0a65,"[0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 4, 136, 192,...","{'xmin': -177.24468994140625, 'xmax': -175.541...",NZ,2,"[{'property': '', 'dataset': 'OpenStreetMap', ...",region,maritime,"{'primary': 'Chatham Islands', 'common': {'bs'...",False,True,NZ-CIT,c8cb5c2a-b62c-481f-b170-0124af43d8b4,divisions,division_area
3,eacddca6-4f38-4257-bba2-46cdcf742f64,"[0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 4, 136, 192,...","{'xmin': -177.24468994140625, 'xmax': -175.541...",NZ,2,"[{'property': '', 'dataset': 'OpenStreetMap', ...",county,maritime,"{'primary': 'Chatham Islands Territory', 'comm...",False,True,NZ-CIT,7dace4fd-6acc-440f-af3f-9286b4e92adc,divisions,division_area
4,b40981d8-1a8b-4b30-bbdc-2a2d941bfa4f,"[0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 1, 100, 192,...","{'xmin': -152.82308959960938, 'xmax': -152.804...",PF,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",locality,land,"{'primary': 'Anapoto', 'common': {'en': 'Anapo...",True,True,,73f9829e-e8b0-4b5f-b029-d51dc8f9bc61,divisions,division_area



=== type=division_boundary ===
Sample file: part-00000-e8569eea-b518-4e81-8f1f-5942cad06009-c000.zstd.parquet


Unnamed: 0,column_name,column_type,null,key,default,extra
0,id,VARCHAR,YES,,,
1,geometry,BLOB,YES,,,
2,bbox,"STRUCT(xmin FLOAT, xmax FLOAT, ymin FLOAT, yma...",YES,,,
3,country,VARCHAR,YES,,,
4,version,INTEGER,YES,,,
5,sources,"STRUCT(property VARCHAR, dataset VARCHAR, reco...",YES,,,
6,subtype,VARCHAR,YES,,,
7,class,VARCHAR,YES,,,
8,is_land,BOOLEAN,YES,,,
9,is_territorial,BOOLEAN,YES,,,


Unnamed: 0,id,geometry,bbox,country,version,sources,subtype,class,is_land,is_territorial,division_ids,region,is_disputed,perspectives,theme,type
0,2b9d4df6-9ae9-3a08-be24-91413606f9b6,"[0, 0, 0, 0, 2, 0, 0, 0, 41, 192, 101, 234, 16...","{'xmin': -175.3327178955078, 'xmax': -175.3082...",TO,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",county,land,True,False,"[f55b8381-1e02-4afa-b4bd-fbcad4cb1693, 596e50a...",TO-04,False,,divisions,division_boundary
1,4616879a-64dd-3160-a8f9-b6e8cf67ce04,"[0, 0, 0, 0, 2, 0, 0, 0, 128, 192, 101, 232, 1...","{'xmin': -175.29612731933594, 'xmax': -175.251...",TO,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",county,land,True,False,"[b52b8829-62f1-47f7-9af4-9244172d8c57, 596e50a...",TO-04,False,,divisions,division_boundary
2,be4b5476-d9b1-3f4b-a8c1-6fde2a4fee30,"[0, 0, 0, 0, 2, 0, 0, 0, 17, 192, 101, 232, 13...","{'xmin': -175.27069091796875, 'xmax': -175.265...",TO,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",county,land,True,False,"[596e50a2-b4d4-43f7-b21c-59c9e5c59141, 8730f0c...",TO-04,False,,divisions,division_boundary
3,f360f2cb-6807-3726-844c-329d0c284088,"[0, 0, 0, 0, 2, 0, 0, 0, 54, 192, 101, 229, 17...","{'xmin': -175.17724609375, 'xmax': -175.148712...",TO,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",county,land,True,False,"[b52b8829-62f1-47f7-9af4-9244172d8c57, d4e6572...",TO-04,False,,divisions,division_boundary
4,06330c3e-d863-38e1-9704-b9e2a048bb2a,"[0, 0, 0, 0, 2, 0, 0, 0, 16, 192, 101, 227, 26...","{'xmin': -175.12155151367188, 'xmax': -175.096...",TO,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",county,land,True,False,"[d4e6572e-52a7-4b26-911a-b4b5008afda7, 2c4d7cc...",TO-04,False,,divisions,division_boundary


In [3]:
division_path = base_path / 'type=division'
country_pattern = str(division_path / '*.parquet')
results_path = (repo_root / 'data' / 'results')
results_path.mkdir(parents=True, exist_ok=True)
countries_output = results_path / 'countries.parquet'

con = duckdb.connect(database=':memory:')
country_df = con.execute(
    "SELECT * FROM read_parquet(?) WHERE subtype = 'country' ORDER BY id",
    [country_pattern]
).fetchdf()
print(f'Retrieved {len(country_df)} division records with subtype=country.')
country_df.to_parquet(countries_output, index=False)
print(f'Saved results to {countries_output}')
display(country_df.head())
con.close()


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Retrieved 219 division records with subtype=country.
Saved results to /workspaces/micromamba_cuda/data/results/countries.parquet


Unnamed: 0,id,geometry,bbox,country,version,sources,cartography,subtype,class,names,...,perspectives,local_type,hierarchies,parent_division_id,norms,population,capital_division_ids,capital_of_divisions,theme,type
0,006a49e8-ea13-49f7-af64-8ba7d7851649,"[0, 0, 0, 0, 1, 64, 34, 204, 222, 227, 79, 198...","{'xmin': 9.400137901306152, 'xmax': 9.40013885...",TN,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",,country,,"{'primary': 'تونس', 'common': {'hy': 'Թունիս',...",...,,{'en': 'country'},[[{'division_id': '006a49e8-ea13-49f7-af64-8ba...,,{'driving_side': 'right'},,[af3a25f5-8c3e-40a4-9b9b-abfb37cac09e],,divisions,division
1,04b8a6ab-a5a1-45fe-a569-1d296454f583,"[0, 0, 0, 0, 1, 192, 46, 250, 231, 92, 155, 11...","{'xmin': -15.490046501159668, 'xmax': -15.4900...",GM,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",,country,,"{'primary': 'Gambia', 'common': {'hy': 'Գամբիա...",...,,{'en': 'country'},[[{'division_id': '04b8a6ab-a5a1-45fe-a569-1d2...,,{'driving_side': 'right'},,[8d8f5445-8488-4f94-a608-cfb40317c0db],,divisions,division
2,051da74f-6039-42fb-943f-3774707222d8,"[0, 0, 0, 0, 1, 64, 64, 27, 187, 113, 90, 182,...","{'xmin': 32.21665573120117, 'xmax': 32.2166595...",UG,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",,country,,"{'primary': 'Uganda', 'common': {'hy': 'Ուգանդ...",...,,{'en': 'country'},[[{'division_id': '051da74f-6039-42fb-943f-377...,,{'driving_side': 'left'},,[ae7f1dd6-21fc-4643-a5f6-38780b2b7ca2],,divisions,division
3,05661c9d-68f5-4a26-a653-05f6ef959b50,"[0, 0, 0, 0, 1, 64, 84, 255, 255, 214, 121, 24...","{'xmin': 83.9999771118164, 'xmax': 83.99999237...",NP,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",,country,,"{'primary': 'नेपाल', 'common': {'hy': 'Նեպալ',...",...,,{'en': 'country'},[[{'division_id': '05661c9d-68f5-4a26-a653-05f...,,{'driving_side': 'left'},,[80848502-b347-463c-a683-aeccdd7696c7],,divisions,division
4,08dcf896-627e-46e8-9439-3d905390b7c3,"[0, 0, 0, 0, 1, 64, 22, 137, 139, 227, 206, 11...","{'xmin': 5.634322643280029, 'xmax': 5.63432359...",NL,1,"[{'property': '', 'dataset': 'OpenStreetMap', ...",,country,,"{'primary': 'Nederland', 'common': {'hy': 'Նիդ...",...,,{'en': 'country'},[[{'division_id': '08dcf896-627e-46e8-9439-3d9...,,{'driving_side': 'right'},,[29be7bc9-5783-4240-87d6-fefa0a64e0b3],,divisions,division


In [None]:
import geopandas as gpd
import folium
from folium.features import GeoJsonTooltip
from IPython.display import display

countries_gdf = gpd.read_parquet(countries_output)
if countries_gdf.crs is None or countries_gdf.crs.to_epsg() != 4326:
    countries_gdf = countries_gdf.to_crs('EPSG:4326')

m = folium.Map(location=[0, 0], zoom_start=2)
folium.GeoJson(
    countries_gdf,
    tooltip=GeoJsonTooltip(fields=['country'], aliases=['Country:']),
).add_to(m)
display(m)


ValueError: Missing geo metadata in Parquet/Feather file.
            Use pandas.read_parquet/read_feather() instead.