# 07b_physical_variables_to_edges (Guadalajara only)

This notebook __transfers variables to the edges__ (which already contain slope, from notebook 07a)
* ndvi_mean
* banqueta_pct
* average_population
* average_density

## Import libraries

In [None]:
first_folder_path = '../../../'

In [None]:
import warnings
warnings.filterwarnings('ignore')
import geopandas as gpd
from geopandas.tools import overlay
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
import seaborn as sns
import random
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, mean_squared_error
#import tensorflow as tf
#from keras.models import Sequential
#from keras.layers import Dense
#from tensorflow.keras.utils import to_categorical
import time

import os
import sys
module_path = os.path.abspath(os.path.join(first_folder_path))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup
else:
    import aup

## Notebook config

In [None]:
# ----- ----- ----- Projection to be used when needed ----- ----- -----
projected_crs = "EPSG:32613"
# ----- ----- ----- Save output locally?
local_save = False

## Load data

### __Load data__ - Nodes and edges

In [None]:
#Previous
#G, nodes, edges = aup.graph_from_hippo(hex_gdf, schema='osmnx', edges_folder='edges_elevation_23_line', nodes_folder='nodes_osmnx_23_point')

In [None]:
#Previous
#nodes = nodes.reset_index()
#nodes.head(2)

In [None]:
#Previous
#edges = edges.reset_index()
#edges.head(2)

In [None]:
#Previous
#edges_var = edges.copy()
#edges_var = edges_var.reset_index()
#edges_var = edges_var.to_crs("EPSG:32613")
#edges_var = edges_var[['u','v','key','osmid','grade_abs','geometry']] # Ya contiene a las prendientes.

In [None]:
# Read file
nodes = gpd.read_file(first_folder_path + "data/processed/volvo/slope_to_edges_gdl/nodes_elev_mde.gpkg")
# Filter for data of interest
nodes = nodes[['osmid','x','y','street_count','elevation','geometry']]
# Review CRS
if nodes.crs != projected_crs:
    nodes = nodes.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Show
print(nodes.shape)
nodes.head(2)

In [None]:
# Read file
edges = gpd.read_file(first_folder_path + "data/processed/volvo/slope_to_edges_gdl/edges_elev_mde.gpkg")
# Filter for data of interest
edges = edges[['u','v','key','grade_abs','geometry']]
# Review CRS
if edges.crs != projected_crs:
    edges = edges.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Show
print(edges.shape)
edges.head(2)

## Add __NDVI__ to analysis

In [None]:
ndvi_schema = "raster_analysis"
ndvi_table = "ndvi_analysis_hex"

query = f"SELECT * FROM {ndvi_schema}.{ndvi_table} WHERE city IN ('Guadalajara') AND res = 11";
ndvi_gdl = aup.gdf_from_query(query, geometry_col='geometry')
# Review CRS
if ndvi_gdl.crs != projected_crs:
    ndvi_gdl = ndvi_gdl.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Show
print(ndvi_gdl.shape)
ndvi_gdl.head(2)

In [None]:
# Hacer la unión espacial para asignar valores NDVI a edges
edges_ndvi = gpd.overlay(edges, ndvi_gdl, how='intersection')

# Show
print(edges_ndvi.crs)
print(edges_ndvi.shape)
edges_ndvi.head(2)

In [None]:
# Calcular la media del NDVI por cada calle (usando 'u', 'v', 'key' como identificadores)
ndvi_per_edge = edges_ndvi.groupby(['u', 'v', 'key'])['ndvi_mean'].mean().reset_index()

# Unir los datos de NDVI agregados con el dataset original de edges
edges_var_1 = edges.merge(ndvi_per_edge, on=['u', 'v', 'key'], how='left')

# Rellenar valores nulos con 0
edges_var_1['ndvi_mean'] = edges_var_1['ndvi_mean'].fillna(0)

# Show
print(edges_var_1.shape)
print(edges_var_1.isna().sum())
edges_var_1.head(2)

In [None]:
edges_var_1.plot("ndvi_mean")

## Add __Land Use__ to analysis

In [None]:
# Load data
land_use = gpd.read_file(first_folder_path + "data/processed/volvo/uso_suelo_gdl/predios_uso_suelo.geojson")
# Ensure projected CRS
if land_use.crs != projected_crs:
    land_use = land_use.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Show
print(land_use.crs)
print(land_use.shape)
print(land_use['clasificacion_uso_suelo'].unique())
land_use.head(2)

In [None]:
# Crear un buffer alrededor de los edges
edges_buffer = edges_var_1.copy()
edges_buffer['geometry'] = edges_buffer.buffer(30)

# Convertir a coordenadas proyectadas para cálculos espaciales
if edges_buffer.crs != projected_crs:
    edges_buffer = edges_buffer.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Filtrar solo los polígonos de uso de suelo Residencial
land_use_residencial = land_use[land_use['clasificacion_uso_suelo'].isin(['Residencial', 'Mixto'])].copy()

# Calcular intersecciones solo con uso de suelo Residencial
intersections_residencial = gpd.overlay(edges_buffer, land_use_residencial, how='intersection')

# Show
print(intersections_residencial.crs)
print(intersections_residencial.shape)
intersections_residencial.head(2)

In [None]:
if len(intersections_residencial) == 0:
    print("No hay intersecciones. Verifica las geometrías y los CRS.")
else:
    # Calcular el área de intersección
    intersections_residencial['area_inte'] = intersections_residencial.geometry.area

    # Sumar el área de intersección por cada segmento de calle
    area_usos_edges = intersections_residencial.groupby(['u', 'v', 'key'])['area_inte'].sum()

    # Calcular la proporción de cada uso de suelo
    land_use_presence_edges = intersections_residencial.groupby(['u', 'v', 'key', 'clasificacion_uso_suelo'])['area_inte'].sum()
    percentage_land_use_edges = land_use_presence_edges / area_usos_edges

    print("Número de intersecciones:", len(intersections_residencial))592
    print("Número de calles únicas con intersecciones:", intersections_residencial[['u', 'v', 'key']].nunique())

    # Calcular el número de categorías por calle
    n_land_use = intersections_residencial.groupby(['u', 'v', 'key'])['clasificacion_uso_suelo'].unique().apply(len)

    # Aplicar la fórmula de entropía de Shannon
    def calcular_entropia(osmid, percentage_series):
        if osmid not in percentage_series.index or len(percentage_series.loc[osmid]) == 1:
            return 0  # Sin datos o solo un uso → entropía 0

        probs = percentage_series.loc[osmid].values
        return -np.sum(probs * np.log2(probs))

    edges_buffer['entropy'] = edges_buffer.set_index(['u', 'v', 'key']).index.map(
        lambda idx: calcular_entropia(idx, percentage_land_use_edges))

    # Convertir a CRS original y guardar resultado
    edges_buffer['buffer_wkt'] = edges_buffer['geometry'].apply(lambda geom: geom.wkt)   

In [None]:
# Show result
print(edges_buffer['entropy'].unique())
print(edges_buffer.shape)
edges_buffer.head(2)

In [None]:
# Store result in edges_var
edges_var_2 = edges_buffer.copy()

# Show
print(edges_var_2.crs)
print(edges_var_2.shape)
edges_var_2.head(2)

## Sidewalks

In [None]:
hexs_type = "urban"
city = "Guadalajara"
query_urban = f"SELECT * FROM hexgrid.hexgrid_8_city_2020 WHERE \"city\" LIKE \'{city}\' AND \"type\" LIKE \'{hexs_type}\'"
query_urban

In [None]:
hex_gdf = aup.gdf_from_query(query_urban, geometry_col='geometry')
# Show
print(hex_gdf.shape)
hex_gdf.head(2)

In [None]:
# Create poly_wkt from city's hexgrid
gdf_tmp = hex_gdf.copy()
gdf_tmp = gdf_tmp.to_crs(projected_crs)
gdf_tmp = gdf_tmp.buffer(1).reset_index().rename(columns={0:'geometry'})
gdf_tmp = gdf_tmp.set_geometry("geometry")
gdf_tmp = gdf_tmp.to_crs("EPSG:4326")
poly_wkt = gdf_tmp.dissolve().geometry.to_wkt()[0]
# Load sidewalks
query = f"SELECT * FROM urbano.fm_inegi_20_line WHERE ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\')"
sidewalks = aup.gdf_from_query(query, geometry_col='geometry')
# Ensure projected CRS
if sidewalks.crs != projected_crs:
    sidewalks = sidewalks.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Show
print(sidewalks.crs)
print(sidewalks.shape)
sidewalks.head(4)

In [None]:
# Explore data
sidewalks.columns

In [None]:
# Explore sidewalks data
print(sidewalks['BANQUETA'].unique())

In [None]:
# Filter for BANQUETA with data
# Data in INEGI's "Descriptores de la base de datos frentes de manzana INV2020" file:
# 1 = Dispone
# 3 = No dispone
# 7 = Conjunto habitacional
# 8 = No Aplica
# 9 = No especificado

sidewalks_withdata = sidewalks[sidewalks['BANQUETA'].isin([1,3,7,8])].copy()

In [None]:
sidewalks_withdata.geom_type.unique()

In [None]:
# Create buffer again
edges_buffer = edges_var_2.copy()
edges_buffer['geometry'] = edges_buffer.buffer(30)

# Intersect sidewalks with edge's 30 meter buffer
intersections = sidewalks_withdata.overlay(edges_buffer, how='intersection')
# Add length data to each intersected sidewalk
intersections['length'] = intersections.length

# Show
print(intersections.shape)
intersections.head(2)

In [None]:
# Group intersected sidewalks by unique edge ID (u,v,key) and BANQUETA code
banqueta_edges = intersections[['u','v','key','BANQUETA','length']].groupby(['u','v','key','BANQUETA']).sum()
banqueta_edges = banqueta_edges.reset_index()

# Show
print(banqueta_edges.shape)
banqueta_edges.head(2)

In [None]:
# Store third variable data
edges_var_3 = edges_var_2.copy()

# Sidewalk available (1)
edges_var_3 = edges_var_3.merge(banqueta_edges.loc[banqueta_edges.BANQUETA==1,['u','v','key','length']],
                                on=['u','v','key'],
                                how='left'
                               )
edges_var_3 = edges_var_3.rename(columns={'length':'sidewalk_available'})

# Sidewalk not available (3, 7 or 8)
edges_var_3 = edges_var_3.merge(banqueta_edges.loc[banqueta_edges.BANQUETA.isin([3,7,8]),['u','v','key','length']],
                                on=['u','v','key'],
                                how='left'
                               )
edges_var_3 = edges_var_3.rename(columns={'length':'no_sidewalk_available'})

# Show
print(edges_var_3.crs)
print(edges_var_3.shape)
edges_var_3.head(2)

In [None]:
# Since no sidewalks are composed by various groups, group lengths
nosidewalk_group = edges_var_3.groupby(['u', 'v', 'key']).agg({'no_sidewalk_available':np.sum})
nosidewalk_group.reset_index(inplace=True)

# Merge with previous data
edges_var_3_fixed = edges_var_3.copy()
edges_var_3_fixed.drop(columns=['no_sidewalk_available'],inplace=True)
edges_var_3_fixed = pd.merge(edges_var_3_fixed, nosidewalk_group, on=['u','v','key'])
edges_var_3_fixed.drop_duplicates(inplace=True)

# Calculate pct of available sidewalk
edges_var_3_fixed['banqueta_pct'] = edges_var_3_fixed['sidewalk_available'] / (edges_var_3_fixed['sidewalk_available'] + edges_var_3_fixed['no_sidewalk_available'])

# Show
print(edges_var_3_fixed.crs)
print(edges_var_3_fixed.shape)
edges_var_3_fixed.head(2)

In [None]:
edges_var_3_fixed.plot('banqueta_pct')

In [None]:
previous = """
if intersections.empty:
    print("No hay intersecciones. Saltando...")
    edges_buffer['sidewalk'] = 0  
else:
    # Asegurar que ambos GeoDataFrames tienen el mismo CRS
    if edges_buffer.crs != projected_crs:
        edges_buffer = edges_buffer.to_crs(projected_crs)
        print(f"Changed edges_buffer crs to {projected_crs}.")
    
    if intersections.crs != projected_crs:
        intersections = intersections.to_crs(projected_crs)
        print(f"Changed intersections crs to {projected_crs}.")

    # Inicializar la columna 'sidewalk' con 0
    edges_buffer['sidewalk'] = 0  

    total_count = len(edges_buffer)
    progress_logs = [1,2,3,4,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100] # for log statistics
    i = 1
    start_time = time.time()
    
    # Iterar sobre cada buffer en edges_buffer
    for idx, edge in edges_buffer.iterrows():
        
        # Measures current progress, prints if passed a checkpoint of progress_logs list.
        current_progress = (i / total_count)*100
        for checkpoint in progress_logs:
            if current_progress >= checkpoint:

                checkpoint_time = time.time()
                time_since = checkpoint_time - start_time
                print(f'Calculating length of available sidewalks. {checkpoint}% done. Time so far: {round(time_since,0)}s.')
                progress_logs.remove(checkpoint)
                break
        
        # Filtrar las banquetas contenidas en el buffer
        sidewalks_in_buffer = intersections[intersections['geometry'].within(edge['geometry'])]
        
        # Calcular la longitud total de banquetas dentro del buffer
        total_length = sidewalks_in_buffer['geometry'].length.sum()
        
        # Calcular la longitud de banquetas con BANQUETA = 1
        length_with_sidewalks = sidewalks_in_buffer[sidewalks_in_buffer['BANQUETA'] == 1]['geometry'].length.sum()
        
        # Evitar división por cero y calcular la proporción de banquetas
        if total_length > 0:
            edges_buffer.at[idx, 'sidewalk'] = length_with_sidewalks / total_length
        else:
            edges_buffer.at[idx, 'sidewalk'] = 0  # Si no hay banquetas en el buffer, asignar 0

        i+=1

# Convertir a CRS original y guardar resultados
edges_buffer = edges_buffer.to_crs('epsg:4326')
#edges_buffer.to_file('sidewalks_edges.geojs0019229114922on', driver='GeoJSON')"""

## Densidad a edges

In [None]:
prev="""
nodes_id = edges_var.v.unique().tolist()
u = edges_var.u.unique().tolist()
nodes_id.extend(u)
myset = set(nodes_id)
nodes_id = list(myset)
schema = 'censo'
nodes_folder = 'pobcenso_inegi_20_mzaageb_node'
nodes_query = f"SELECT * FROM {schema}.{nodes_folder} WHERE osmid IN {str(tuple(nodes_id))}"
nodes_pop = aup.gdf_from_query(nodes_query, geometry_col="geometry", index_col="osmid")
"""

In [None]:
prev="""
voronoi = gpd.read_file('/home/jovyan/accesibilidad-urbana/data/external/Guadalajara_script22_voronoipolys.gpkg')
voronoi = voronoi.to_crs("EPSG:32613")
voronoi['area_ha'] = voronoi.area/10000
"""

In [None]:
prev="""
# Realiza el merge para unir los datos de población y área desde `voronoi_gdf` a `nodes_gdf` basado en "osmid"
nodes_pop = nodes_pop.merge(
    voronoi[['osmid', 'area_ha']],  # Selecciona solo las columnas necesarias de voronoi_gdf
    on='osmid',
    how='left'
    )

# Verifica las primeras filas para asegurarte de que el merge se realizó correctamente
nodes_pop.head(3
"""

In [None]:
prev="""
nodes_pop['dens'] = nodes_pop['pobtot'] / nodes_pop['area_ha']
nodes_pop.head(3)
"""

In [None]:
# Load nodes with pop data
nodes_pop = gpd.read_file(first_folder_path + "data/processed/volvo/project_network_initial_guadalajara/guadalajara_voronois_pop_gdf_b.gpkg")

# Review CRS
if nodes_pop.crs != projected_crs:
    nodes_pop = nodes_pop.to_crs(projected_crs)
    print(f"Changed crs to {projected_crs}.")

# Show
print(nodes_pop.shape)
nodes_pop.head(2)

In [None]:
# Store fourth variable data
edges_var_4 = edges_var_3_fixed.copy()

# Merge data for osmids in 'u'
edges_var_4 = edges_var_4.merge(nodes_pop[['osmid','pobtot','dens_pob_ha']],
                            left_on='u', right_on='osmid',
                            how='left')
edges_var_4 = edges_var_4.rename(columns={'pobtot':'pobtot_u',
                                      'dens_pob_ha':'dens_u'}
                            )
edges_var_4.drop(columns='osmid',inplace=True)

# Merge data for osmids in 'v'
edges_var_4 = edges_var_4.merge(nodes_pop[['osmid','pobtot','dens_pob_ha']],
                           left_on='v', right_on='osmid',
                           how='left')
edges_var_4 = edges_var_4.rename(columns={'pobtot':'pobtot_v',
                                      'dens_pob_ha':'dens_v'}
                            )
edges_var_4.drop(columns='osmid',inplace=True)

# Show
edges_var_4.head(2)

In [None]:
# Calcular valores de cada edge
edges_var_4["average_population"] = (edges_var_4['pobtot_u'] + edges_var_4['pobtot_v']) / 2
edges_var_4["total_pobtot"] = edges_var_4['pobtot_u'] + edges_var_4['pobtot_v']
edges_var_4['average_density'] = edges_var_4[['dens_u','dens_v']].mean(axis=1)

# Show
edges_var_4.head(2)

In [None]:
# Retrieve original lines geometries (edges_var_4's geometries contain the buffers)
edges_var_df_cols = ['u', 'v', 'key',
                     'grade_abs',
                     'ndvi_mean','entropy',
                     'sidewalk_available','no_sidewalk_available','banqueta_pct',
                     'pobtot_u','pobtot_v','total_pobtot','average_population',
                     'dens_u','dens_v','average_density']

edges_var_final = pd.merge(edges_var_4[edges_var_df_cols],edges[['u','v','key','geometry']],on=['u','v','key'])
edges_var_final = gpd.GeoDataFrame(edges_var_final,geometry='geometry',crs=projected_crs)

# Show
print(type(edges_var_final))
print(edges_var_final.geom_type.unique())
print(edges_var_final.crs)
print(edges_var_final.shape)
edges_var_final.head(2)

In [None]:
if local_save:
    edges_var_final.to_file(first_folder_path + "data/processed/volvo/edges_var_gdl/edges_var_final.gpkg")