In [None]:
import pathlib

import geopandas as gpd
import pandas as pd

In [None]:
DATA_ROOT_PATH = pathlib.Path().absolute().parent / "data"

In [None]:
# See data/README.md to download this file
german_vg_df = gpd.read_file(DATA_ROOT_PATH / "external" / "DE_VG5000.gpkg", layer="vg5000_vwg")

In [None]:
german_vg_stripped_df = german_vg_df.loc[:, ["OBJID", "GEN", "geometry"]]
german_vg_stripped_df

In [None]:
german_vg_calculated = german_vg_stripped_df
german_vg_calculated.loc[:, "center"] = german_vg_calculated.geometry.centroid
german_vg_calculated

In [None]:
globus_df = pd.read_csv(DATA_ROOT_PATH / "processed" / "globus_info.csv", sep=";")
globus_positions = gpd.GeoDataFrame(
    globus_df,
    geometry=gpd.GeoSeries.from_xy(globus_df["lon"], globus_df["lat"]),
    crs=4326,
)
globus_positions = globus_positions.drop(columns=["lat", "lon"])
globus_positions = globus_positions.to_crs(epsg=25832)
globus_positions

In [None]:
community_next_globus_info_full = gpd.sjoin_nearest(
    german_vg_calculated.set_geometry("center"),
    globus_positions,
    distance_col="distance",
    lsuffix="left",
    rsuffix="right",
    exclusive=True,
)
community_next_globus_info_full

In [None]:
community_next_globus_info = (
    community_next_globus_info_full[["GEN", "distance", "geometry", "index_right"]]
    .set_geometry("geometry")
    .to_crs(crs=4326)
)
community_next_globus_info = community_next_globus_info.rename(
    columns={"GEN": "community", "index_right": "globus_row"},
).astype({"community": "string", "distance": "float32", "globus_row": "int8"})
community_next_globus_info.to_parquet(DATA_ROOT_PATH / "processed" / "german_communities.parquet")