In [None]:
import osmnx as ox
from pathlib import Path
import os

project_dir = Path(os.getcwd()).parent

temp_data_dir = project_dir / "temp_data"
data_dir = project_dir / "data"

cities = ox.features_from_place("Poland", {"place": ["city", "town", "village"]})
cities = cities.to_crs(epsg=2180)

cities = cities.loc["node", ["geometry", "name", "place", "population", "name:pl", "wikipedia"]].reset_index(drop=True)
cities["name"] = (
    cities["name"]
        .fillna(cities["name:pl"])
        .fillna(cities["wikipedia"].str.extract(r"^pl:(.*?)\s*\(", expand=False))
)
cities = cities.dropna(subset=["name"])
cities = cities.drop(columns=["name:pl", "wikipedia"])

# cities.to_csv(data_dir / "cities.csv", index=False)

In [None]:
# notes FOR FUTURE:

import osmnx as ox

admin = ox.features_from_place(
    "Poland",
    tags={
        "boundary": "administrative",
        "admin_level": ["4", "6", "8"]
    }
)

admin = admin[admin.geometry.geom_type.isin(["Polygon", "MultiPolygon"])]

woj = admin[admin["admin_level"] == "4"][["name", "geometry"]]
powiat = admin[admin["admin_level"] == "6"][["name", "geometry"]]
gmina = admin[admin["admin_level"] == "8"][["name", "geometry"]]

woj = woj.to_crs(epsg=2180)
powiat = powiat.to_crs(epsg=2180)
gmina = gmina.to_crs(epsg=2180)

import geopandas as gpd

cities = gpd.sjoin(
    cities,
    woj.rename(columns={"name": "wojewodztwo"}),
    how="left",
    predicate="within"
)

cities = gpd.sjoin(
    cities,
    powiat.rename(columns={"name": "powiat"}),
    how="left",
    predicate="within"
)

cities = gpd.sjoin(
    cities,
    gmina.rename(columns={"name": "gmina"}),
    how="left",
    predicate="within"
)

cities = cities.drop(columns=[c for c in cities.columns if c.startswith("index_")])

missing = cities["gmina"].isna()

nearest = gpd.sjoin_nearest(
    cities[missing],
    gmina[["name", "geometry"]],
    how="left",
    max_distance=500  # meters
)

cities.loc[missing, "gmina"] = nearest["name"].values
