In [1]:
import os
from pathlib import Path

import geopandas as gpd
import numpy as np
import osmnx as ox
import pandas as pd

In [2]:
data_path = Path(os.environ["DATA_PATH"])
population_grids_path = Path(os.environ["POPULATION_GRIDS_PATH"])
segregation_path = Path(os.environ["SEGREGATION_PATH"])

In [3]:
df_agebs = (
    gpd.read_file(
        population_grids_path
        / "final"
        / "zone_agebs"
        / "shaped"
        / "2020"
        / "08.2.03.gpkg",
    )
    .drop(columns=["POBTOT"])
    .set_index("CVEGEO")
)

In [4]:
df_census = (
    pd.read_csv(
        population_grids_path
        / "initial"
        / "census"
        / "INEGI"
        / "2020"
        / "conjunto_de_datos_ageb_urbana_08_cpv2020.csv",
        usecols=[
            "ENTIDAD",
            "MUN",
            "LOC",
            "AGEB",
            "NOM_LOC",
            "POBTOT",
            "P_0A2",
            "P_3A5",
            "P_60YMAS",
            "P18YM_PB",
            "P_18YMAS",
            "GRAPROES",
        ],
    )
    .query("NOM_LOC == 'Total AGEB urbana'")
    .assign(
        CVEGEO=lambda df: (
            df["ENTIDAD"].astype(str).str.zfill(2)
            + df["MUN"].astype(str).str.zfill(3)
            + df["LOC"].astype(str).str.zfill(4)
            + df["AGEB"].astype(str).str.zfill(4)
        ),
    )
    .drop(columns=["ENTIDAD", "MUN", "LOC", "AGEB", "NOM_LOC"])
    .set_index("CVEGEO")
    .replace("*", np.nan)
    .astype(float)
)

In [5]:
df = df_agebs.join(df_census, how="inner")

# Gráfica

In [None]:
bounds = tuple(float(x) for x in df.to_crs("EPSG:4326").total_bounds)

g = ox.graph_from_bbox(bounds)
g_proj = ox.project_graph(g, to_crs="EPSG:6372")

In [21]:
coords = df.centroid.get_coordinates()
df["node_id"] = ox.nearest_nodes(
    g_proj, coords["x"], coords["y"], return_dist=False
)

In [6]:
rows = {}

# Porcentaje población 0-5 años

In [7]:
rows["porcentaje_pob_0a5"] = (df["P_0A2"] + df["P_3A5"]) / df["POBTOT"]

# Porcentaje de población >60

In [8]:
rows["porcentaje_pob_60"] = df["P_60YMAS"] / df["POBTOT"]

# Porcentaje de población con menos de preparatoria terminada

In [9]:
rows["porcentaje_menos_prepa_terminada"] = 1 - df["P18YM_PB"] / df["P_18YMAS"]

# Ingreso

In [10]:
rows["ingreso"] = (
    gpd.read_file(segregation_path / "incomes" / "M08.04.gpkg")
    .rename(columns={"cvegeo": "CVEGEO"})
    .set_index("CVEGEO")["income_pc"]
)

# Accesibilidad

## Hospitales

In [12]:
df_hospitals = (
    gpd.read_file(data_path / "datos" / "Unidad_Medica_Wgs84")
    .reset_index(drop=True)
    .filter(["geometry"])
    .to_crs("EPSG:6372")
)

coords_hospitals = df_hospitals.get_coordinates()

In [15]:
df_hospitals["node_id"] = ox.nearest_nodes(g_proj, coords_hospitals["x"], coords_hospitals["y"], return_dist=False)

In [35]:
g_speed = ox.add_edge_speeds(g)
g_speed = ox.add_edge_travel_times(g_speed)

In [53]:
paths = ox.shortest_path(
    g_speed,
    [df["node_id"].to_numpy()[0]] * len(df_hospitals),
    df_hospitals["node_id"].to_numpy(),
    weight="travel_time",
    cpus=14,
)

In [None]:
for path in paths:
ox.routing.route_to_gdf(g_speed, paths[0])["travel_time"].sum()

np.float64(236.08939387104772)