In [1]:
import os
from pathlib import Path

import geopandas as gpd
import numpy as np
import osmnx as ox
import pandana as pdna
import pandas as pd

In [2]:
data_path = Path(os.environ["DATA_PATH"])
population_grids_path = Path(os.environ["POPULATION_GRIDS_PATH"])
segregation_path = Path(os.environ["SEGREGATION_PATH"])
census_path = Path(os.environ["CENSUS_PATH"])
geostatistical_framework_path = Path(os.environ["GEOSTATISTICAL_FRAMEWORK_PATH"])

# DataFrames

## Census

In [3]:
df_census_base = pd.read_csv(
    census_path / "2020" / "08.csv",
    usecols=[
        "ENTIDAD",
        "MUN",
        "LOC",
        "AGEB",
        "MZA",
        "NOM_LOC",
        "POBTOT",
        "P_0A2",
        "P_3A5",
        "P_60YMAS",
        "P18YM_PB",
        "P_18YMAS",
        "GRAPROES",
    ],
)

## AGEBs

In [4]:
df_geom_agebs = (
    gpd.read_file(
        population_grids_path
        / "final"
        / "zone_agebs"
        / "shaped"
        / "2020"
        / "08.2.03.gpkg",
    )
    .drop(columns=["POBTOT"])
    .set_index("CVEGEO")
)

df_census_agebs = (
    df_census_base.query("NOM_LOC == 'Total AGEB urbana'")
    .assign(
        CVEGEO=lambda df: (
            df["ENTIDAD"].astype(str).str.zfill(2)
            + df["MUN"].astype(str).str.zfill(3)
            + df["LOC"].astype(str).str.zfill(4)
            + df["AGEB"].astype(str).str.zfill(4)
        ),
    )
    .drop(columns=["ENTIDAD", "MUN", "LOC", "AGEB", "NOM_LOC"])
    .set_index("CVEGEO")
    .replace("*", np.nan)
    .astype(float)
)

df = df_geom_agebs.join(df_census_agebs, how="inner")

## Blocks

In [37]:
df_geom_blocks = (
    gpd.read_file(geostatistical_framework_path / "2020" / "08_chihuahua" / "08m.shp")
    .assign(CVEGEO_AGEB=lambda x: x.CVEGEO.str[:13])
    .query("CVEGEO_AGEB in @df_geom_agebs.index")
    .set_index("CVEGEO")
    .to_crs("EPSG:6372")
)


df_census_blocks = (
    df_census_base.assign(
        CVEGEO=lambda df: (
            df["ENTIDAD"].astype(str).str.zfill(2)
            + df["MUN"].astype(str).str.zfill(3)
            + df["LOC"].astype(str).str.zfill(4)
            + df["AGEB"].astype(str).str.zfill(4)
            + df["MZA"].astype(str).str.zfill(3)
        ),
    )
    .drop(columns=["ENTIDAD", "MUN", "LOC", "AGEB", "MZA", "NOM_LOC"])
    .set_index("CVEGEO")
    .replace(["*", "N/D"], np.nan)
    .astype(float)
)

df_blocks = df_geom_blocks.join(df_census_blocks, how="inner")

# Gráfica

In [6]:
bounds = tuple(float(x) for x in df.to_crs("EPSG:4326").total_bounds)

g = ox.graph_from_bbox(bounds, network_type="drive")
g = ox.project_graph(g, to_crs="EPSG:6372")
g = ox.add_edge_speeds(g)
g = ox.add_edge_travel_times(g)

In [39]:
coords = df_blocks.centroid.get_coordinates()
df_blocks["node_id"] = ox.nearest_nodes(g, coords["x"], coords["y"])

In [7]:
df_nodes, df_edges = ox.graph_to_gdfs(g, nodes=True, edges=True)
df_edges = df_edges.reset_index()

In [8]:
net = pdna.Network(
    df_nodes["x"],
    df_nodes["y"],
    df_edges["u"],
    df_edges["v"],
    df_edges[["travel_time"]],
)
net.precompute(3600)

# Stats

In [9]:
rows = {}

## Porcentaje población 0-5 años

In [10]:
rows["porcentaje_pob_0a5"] = (df["P_0A2"] + df["P_3A5"]) / df["POBTOT"]

## Porcentaje de población >60

In [11]:
rows["porcentaje_pob_60"] = df["P_60YMAS"] / df["POBTOT"]

## Porcentaje de población con menos de preparatoria terminada

In [12]:
rows["porcentaje_menos_prepa_terminada"] = 1 - df["P18YM_PB"] / df["P_18YMAS"]

## Ingreso

In [13]:
rows["ingreso"] = (
    gpd.read_file(segregation_path / "incomes" / "M08.04.gpkg")
    .rename(columns={"cvegeo": "CVEGEO"})
    .set_index("CVEGEO")["income_pc"]
)

In [14]:
columns = []
for key, value in rows.items():
    columns.append(value.rename(key))
out = pd.concat(columns, axis=1).join(df[["geometry"]])
out = gpd.GeoDataFrame(out, crs=df.crs, geometry="geometry")

# Accesibilidad

In [62]:
def calculate_weighted_accessibility(
    net: pdna.Network,
    coords: pd.DataFrame,
    poi_type: str,
    df_blocks: gpd.GeoDataFrame,
    *,
    weight_col: str = "POBTOT",
) -> pd.Series:
    net.set_pois(poi_type, 3600, 1, coords["x"], coords["y"])
    res = (
        net.nearest_pois(3600, poi_type)
        .rename(columns={1: "travel_time"})
        .reset_index(names="node_id")
    )
    return (
        df_blocks.merge(res, on="node_id", how="left")
        .assign(num=lambda df: df[weight_col] * df["travel_time"])
        .groupby("CVEGEO_AGEB")
        .agg(
            {
                "num": "sum",
                weight_col: "sum",
            },
        )
        .assign(**{f"{poi_type}_travel_time": lambda df: df["num"] / df[weight_col]})[
            f"{poi_type}_travel_time"
        ]
        .rename_axis("CVEGEO")
    )

## Hospitales

In [None]:
df_hospitals = (
    gpd.read_file(data_path / "datos" / "Unidad_Medica_Wgs84")
    .reset_index(drop=True)
    .filter(["geometry"])
    .to_crs("EPSG:6372")
)

rows["tiempo_viaje_hospitales"] = calculate_weighted_accessibility(
    net,
    df_hospitals.get_coordinates(),
    "hospital",
    df_blocks
) / 60

## Preparatorias

In [75]:
df_highschools = (
    gpd.read_file(data_path / "datos" / "Preparatorias_Wgs84")
    .reset_index(drop=True)
    .filter(["geometry"])
    .to_crs("EPSG:6372")
)

rows["tiempo_viaje_preparatorias"] = calculate_weighted_accessibility(
    net,
    df_highschools.get_coordinates(),
    "highschool",
    df_blocks
) / 60

# Out

In [83]:
out = gpd.GeoDataFrame(pd.DataFrame.from_dict(rows, orient="index").transpose().join(df[["geometry"]]), crs=df.crs, geometry="geometry").to_crs("EPSG:4326")
out.to_file("./test.gpkg")