In [None]:
import osmnx as ox
from pathlib import Path
import os
import geopandas as gpd

from mclights.utils.get_data import get_data_osmx_or_local

project_dir = Path(os.getcwd()).parent
temp_data_dir = project_dir / "temp_data"
raw_data_dir = project_dir / "raw_data"

In [None]:
cities = get_data_osmx_or_local(raw_data_dir / "cities_raw.gpkg", {"place": ["city", "town", "village"]})

In [None]:
cities = cities.loc["node", ["geometry", "name", "place", "population", "name:pl", "wikipedia"]].reset_index(drop=True)
cities["name"] = (
    cities["name"]
        .fillna(cities["name:pl"])
        .fillna(cities["wikipedia"].str.extract(r"^pl:(.*?)\s*\(", expand=False))
)
cities = cities.dropna(subset=["name"])
cities = cities.drop(columns=["name:pl", "wikipedia"])

# cities.to_csv(data_dir / "cities.csv", index=False)

In [None]:
tags={"boundary": "administrative","admin_level": ["2", "4", "6"]}
division = get_data_osmx_or_local(raw_data_dir / "division_raw.gpkg", tags)

In [None]:
# notes FOR FUTURE:

division = division[division.geometry.geom_type.isin(["Polygon", "MultiPolygon"])]
division = division.to_crs(epsg=2180)

poland       = division.loc[division["admin_level"] == "2", ["name", "geometry"]]
voivodeships = division.loc[division["admin_level"] == "4", ["name", "geometry"]]
counties     = division.loc[division["admin_level"] == "6", ["name", "geometry"]]

cities = gpd.sjoin(
    cities,
    voivodeships.rename(columns={"name": "voivodeship"}),
    how="left",
    predicate="within"
)

cities = gpd.sjoin(
    cities,
    counties.rename(columns={"name": "county"}),
    how="left",
    predicate="within"
)

cities = cities.drop(columns=[c for c in cities.columns if c.startswith("index_")])

In [None]:
import matplotlib.pyplot as plt

cities.plot(face_color=None)