In [2]:
import os
from pathlib import Path

import geopandas as gpd

In [3]:
data_path = Path(os.environ["DATA_PATH"])
population_grids_path = Path(os.environ["POPULATION_GRIDS_PATH"])
segregation_path = Path(os.environ["SEGREGATION_PATH"])
geostatistical_framework_path = Path(os.environ["GEOSTATISTICAL_FRAMEWORK_PATH"])

In [4]:
df_agebs = (
    gpd.read_file(
        population_grids_path
        / "final"
        / "zone_agebs"
        / "shaped"
        / "2020"
        / "08.2.03.gpkg",
    )
    .drop(columns=["POBTOT"])
    .set_index("CVEGEO")
)

df_blocks = (
    gpd.read_file(geostatistical_framework_path / "2020" / "08_chihuahua" / "08m.shp")
    .assign(CVEGEO_AGEB=lambda x: x.CVEGEO.str[:13])
    .query("CVEGEO_AGEB in @df_agebs.index")
    .drop(columns=["CVEGEO_AGEB"])
    .set_index("CVEGEO")
    .to_crs("EPSG:6372")
)

In [5]:
# wanted_ids = [2014, 3232, 3374]

df_neighborhoods = (
    gpd.read_file(data_path / "datos" / "colonias_raw.geojson")
    .drop(columns=["ENTIDAD", "MUNICIPIO", "CONTROL"])
    .drop_duplicates(subset=["geometry"])
    .to_crs("EPSG:6372")
    .assign(orig_area=lambda df: df["geometry"].area)
)

overlay = (
    df_neighborhoods.overlay(df_agebs.reset_index())
    .assign(area=lambda df: df["geometry"].area)
    .groupby("ID")
    .agg({"area": "sum", "orig_area": "first"})
    .assign(area_frac=lambda df: df["area"] / df["orig_area"])
    .fillna(0)
)

urban_neighborhoods = overlay[overlay["area_frac"] >= 0.10].index

df_neighborhoods = df_neighborhoods[df_neighborhoods["ID"].isin(urban_neighborhoods)]

In [6]:
self_intersections = (
    df_neighborhoods.sjoin(df_neighborhoods, how="inner", predicate="within")
    .reset_index(names="index_left")
    .query("index_left != index_right")
)

whole = df_neighborhoods[~df_neighborhoods.index.isin(self_intersections.index_left)]

In [7]:
df_fixed = df_neighborhoods.copy()
for _, row in self_intersections.iterrows():
    outer = df_fixed.loc[row["index_right"], "geometry"]
    inner = df_fixed.loc[row["index_left"], "geometry"]
    df_fixed.loc[row["index_right"], "geometry"] = outer.difference(inner)

In [8]:
df_fixed.drop(columns=["orig_area"]).to_file("./fixed.gpkg")