In [90]:
import os
from pathlib import Path

import geopandas as gpd

In [91]:
data_path = Path(os.environ["DATA_PATH"])
population_grids_path = Path(os.environ["POPULATION_GRIDS_PATH"])
segregation_path = Path(os.environ["SEGREGATION_PATH"])
geostatistical_framework_path = Path(os.environ["GEOSTATISTICAL_FRAMEWORK_PATH"])

In [92]:
df_agebs = (
    gpd.read_file(
        population_grids_path
        / "final"
        / "zone_agebs"
        / "shaped"
        / "2020"
        / "08.2.03.gpkg",
    )
    .drop(columns=["POBTOT"])
    .set_index("CVEGEO")
)

df_blocks = (
    gpd.read_file(geostatistical_framework_path / "2020" / "08_chihuahua" / "08m.shp")
    .assign(CVEGEO_AGEB=lambda x: x.CVEGEO.str[:13])
    .query("CVEGEO_AGEB in @df_agebs.index")
    .drop(columns=["CVEGEO_AGEB"])
    .set_index("CVEGEO")
    .to_crs("EPSG:6372")
)

In [95]:
df_neighborhoods = (
    gpd.read_file("./neighborhoods.gpkg")
    .filter(["NOMBRE", "ID", "geometry"])
    .drop_duplicates(subset=["geometry"])
    .to_crs("EPSG:6372")
    .query("~NOMBRE.isin(['SIN COLONIA', 'SIN NOMBRE'])")
    .query("~ID.isin([3500, 2955])")
)

In [96]:
self_intersections = (
    df_neighborhoods.sjoin(df_neighborhoods, how="inner", predicate="within")
    .reset_index(names="index_left")
    .query("index_left != index_right")
)

whole = df_neighborhoods[~df_neighborhoods.index.isin(self_intersections.index_left)]

self_intersections.to_file("./intersections.gpkg")
whole.to_file("./whole.gpkg")

In [97]:
df_fixed = df_neighborhoods.copy()
for _, row in self_intersections.iterrows():
    outer = df_fixed.loc[row["index_right"], "geometry"]
    inner = df_fixed.loc[row["index_left"], "geometry"]
    df_fixed.loc[row["index_right"], "geometry"] = outer.difference(inner)

In [89]:
df_fixed.to_file("./fixed.gpkg")