In [22]:
import os
from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd

In [23]:
data_path = Path(os.environ["DATA_PATH"])
population_grids_path = Path(os.environ["POPULATION_GRIDS_PATH"])
geostatistical_framework_path = Path(os.environ["GEOSTATISTICAL_FRAMEWORK_PATH"])
census_path = Path(os.environ["CENSUS_PATH"])

In [24]:
df_agebs = (
    gpd.read_file(
        population_grids_path
        / "final"
        / "zone_agebs"
        / "shaped"
        / "2020"
        / "08.2.03.gpkg",
    )
    .drop(columns=["POBTOT"])
    .set_index("CVEGEO")
)

df_blocks = (
    gpd.read_file(geostatistical_framework_path / "2020" / "08_chihuahua" / "08m.shp")
    .assign(CVEGEO_AGEB=lambda x: x.CVEGEO.str[:13])
    .query("CVEGEO_AGEB in @df_agebs.index")
    .drop(columns=["CVEGEO_AGEB"])
    .set_index("CVEGEO")
    .to_crs("EPSG:6372")
)

df_neighborhoods = gpd.read_file("./fixed.gpkg").set_index("ID")

In [25]:
df_census = (
    pd.read_csv(
        census_path / "2020" / "08.csv",
        usecols=["ENTIDAD", "MUN", "LOC", "AGEB", "MZA", "POBTOT", "P_60YMAS"],
    )
    .assign(
        CVEGEO=lambda df: (
            df["ENTIDAD"].astype(str).str.zfill(2)
            + df["MUN"].astype(str).str.zfill(3)
            + df["LOC"].astype(str).str.zfill(4)
            + df["AGEB"].astype(str).str.zfill(4)
            + df["MZA"].astype(str).str.zfill(3)
        ),
    )
    .set_index("CVEGEO")
    .drop(columns=["ENTIDAD", "MUN", "LOC", "AGEB", "MZA"])
    .replace(["*", "N/D"], np.nan)
    .astype(float)
)

In [26]:
joined = (
    df_blocks.assign(orig_area=lambda df: df["geometry"].area)
    .reset_index()
    .overlay(df_neighborhoods.reset_index(names="neighborhood_id"))
    .assign(inter_area_frac=lambda df: df["geometry"].area / df["orig_area"])
    .sort_values("inter_area_frac", ascending=False)
    .groupby("CVEGEO")
    .first()
    .query("inter_area_frac >= 0.9")
)

df_blocks_extended = df_blocks.assign(neighborhood=joined["neighborhood_id"]).assign(
    neighborhood=lambda df: df["neighborhood"].fillna(0).astype(int),
)
# df_blocks_extended.query("neighborhood.isna()").to_file("./test.gpkg")

In [27]:
df_neighborhoods_extended = (
    df_blocks_extended.join(df_census[["POBTOT", "P_60YMAS"]])
    .groupby("neighborhood")
    .agg(
        {
            "POBTOT": "sum",
            "P_60YMAS": "sum",
        },
    )
)

In [28]:
df_neighborhoods_extended

Unnamed: 0_level_0,POBTOT,P_60YMAS
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
0,88027.0,7042.0
1,302.0,54.0
2,333.0,82.0
3,804.0,178.0
4,494.0,123.0
...,...,...
3480,0.0,0.0
3486,16.0,0.0
3488,12.0,3.0
3491,21.0,4.0


In [52]:
rows = {}
rows["porcentaje_pob_60"] = (
    df_neighborhoods_extended["P_60YMAS"] / df_neighborhoods_extended["POBTOT"]
)

In [66]:
out = (
    pd.DataFrame.from_dict(rows, orient="index")
    .transpose()
    .assign(
        name=df_neighborhoods["NOMBRE"],
        geometry=df_neighborhoods["geometry"],
    )
    .drop(index=[0])
)
out = gpd.GeoDataFrame(out, crs=df_neighborhoods.crs).to_crs("EPSG:4326")

In [67]:
out.to_file("./neighborhoods.geojson")