# Script 27 full network nearest data

Similar to notebook 28d, but using OSMnx network with pje_ep (Public space quality index) data from Santiago's team. (Preprocessed on script 31a).

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from shapely.geometry import Point
import osmnx as ox

from tqdm import tqdm
import h3

import os
import sys
module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## Notebook config

In [4]:
# -----------------------
# Directories
gral_dir = '../../../data/external/santiago/'
all_pois_dir = gral_dir + "pois/"
output_dir = gral_dir + "nearest/"

# -----------------------
# Project data (complete_network)
project_name = 'red_completa'
p_code = '00'

# -----------------------
# Area of interest
aoi_schema = 'projects_research'
aoi_table = 'santiago_aoi'
city = 'alamedabuffer_4500m'

# -----------------------
# Nodes and edges preprocessed on Notebook 31a.
network_schema = 'projects_research'
nodes_table = 'santiago_nodes'
edges_table = 'pje_ep_edges'

# -----------------------
# Network preparation
delete_ids = [51047,51048,54031,54032,87638,89508,89512,89513] # Belonging to a unexistent footpath in Parque Bueras project.
project_ids = []
filtering_column = 'pje_ep'
filtering_value = 0.5

# -----------------------
# Projection
projected_crs = "EPSG:32719"
# Calculate pois nearest? (Takes time)
calculate_nearest = True
# Save pois nearest to local?
save = True

# -----------------------

# Sources (pois)
source_list = ['carniceria','hogar','bakeries','supermercado','banco', #supplying-wellbeing
               #supplying-sociability
               'ferias','local_mini_market','correos', 
               #supplying-environmental impact
               'centro_recyc',

               #caring-wellbeing
               'hospital_priv','hospital_pub','clinica_priv','clinica_pub','farmacia','vacunatorio_priv','vacunatorio_pub','consult_ado_priv','consult_ado_pub','salud_mental','labs_priv','residencia_adumayor',
               #caring-sociability
               'eq_deportivo_priv','eq_deportivo_pub','club_deportivo',
               #caring-environmental impact [areal data: 'noise','temp']

               #living-wellbeing
               'civic_office','tax_collection','social_security','police','bomberos',
               #living-sociability [areal data: 'houses','social_viv','hotel']
               #living-environmental impact [areal_data: 'inter']
               
               #enjoying-wellbeing [areal data: 'ndvi']
               'museos_priv','museos_pub','cines','sitios_historicos',
               #enjoying-sociability
               'restaurantes_bar_cafe','librerias','ep_plaza_small',
               #enjoying-environmental impact
               'ep_plaza_big',

               #learning-wellbeing
               'edu_basica_pub','edu_media_pub','jardin_inf_pub','universidad', 'edu_tecnica',
               #learning-sociability
               'edu_adultos_pub','edu_especial_pub','bibliotecas',
               #learning-environmental impact
               'centro_edu_amb',

               #working-wellbeing
               'paradas_tp_ruta','paradas_tp_metro','paradas_tp_tren',
               #working-sociability [areal data: 'oficinas']
               #working-environmental impact
               'ciclovias','estaciones_bicicletas']

# Unique ID sources require extra code
unique_id_sources = ['ferias','ep_plaza_small','ep_plaza_big','ciclovias']
goi_id = 'ID'

## G, nodes and edges for full network

In [5]:
# Area of interest (aoi)
print("--- Downloading area of interest.")
query = f"SELECT * FROM {aoi_schema}.{aoi_table} WHERE \"city\" LIKE \'{city}\'"
aoi = aup.gdf_from_query(query, geometry_col='geometry')
aoi = aoi.set_crs("EPSG:4326")

# Using graph_from_hippo allows us to also select nodes that are outside aoi but connect to a given edge.
print("--- Loading project network.")
G, nodes, edges = aup.graph_from_hippo(aoi, network_schema, edges_table, nodes_table, projected_crs)
nodes = nodes.reset_index()
edges = edges.reset_index()
print(f"Loaded file with {len(edges)} edges.")
print(f"Loaded file with {len(nodes)} nodes.")

# Delete lines that shouldn't be there
edges_f = edges.loc[~edges['line_id'].isin(delete_ids)].copy()
print("--- Deleted unaccurate footpath.")
# Change filtering_column values in project lines to include them
idx = edges_f['line_id'].isin(project_ids)
edges_f.loc[idx,filtering_column] = filtering_value+0.01
# Filter edges by filtering_value
if project_name != 'red_completa':
    edges_filt = edges_f.loc[edges_f[filtering_column] >= filtering_value].copy()
    print("--- Filtered project network's edges by filtering value.")
else:
    edges_filt = edges_f.copy()
    print("--- Copied network's edges without filtering.")

# Filtered network - Some edges were dropped, so filter nodes from edges
nodes_id = list(edges_filt.v.unique())
u = list(edges_filt.u.unique())
nodes_id.extend(u)
myset = set(nodes_id)
osmids_lst = list(myset)
nodes = nodes.loc[nodes.osmid.isin(osmids_lst)]
print(f"--- Filtered nodes using edges, kept {len(nodes)} nodes.")

# Filtered network - Prepare nodes
nodes_gdf = nodes.copy()
nodes_gdf.set_index('osmid',inplace=True)
# Filtered network -  Prepare edges
edges_gdf = edges_filt.copy()
edges_gdf.set_index(['u','v','key'],inplace=True)
# Filtered network - Create G and rename nodes and edges
G = ox.graph_from_gdfs(nodes_gdf, edges_gdf)
nodes = nodes_gdf.copy()
edges = edges_gdf.copy()
print(f"--- Created G network with {len(edges)} edges and {len(nodes)} nodes.")

--- Downloading area of interest.
--- Loading project network.
Loaded file with 114168 edges.
Loaded file with 43734 nodes.
--- Deleted unaccurate footpath.
--- Copied network's edges without filtering.
--- Filtered nodes using edges, kept 43734 nodes.
--- Created G network with 114160 edges and 43734 nodes.


In [6]:
# Show
print(nodes.shape)
nodes.head(1)

(43734, 4)


Unnamed: 0_level_0,x,y,street_count,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
386138,-70.646254,-33.442901,4,POINT (-70.64625 -33.44290)


In [7]:
# Show
print(edges.shape)
edges.head(1)

(114160, 17)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,line_id,pje_ep,osmid,oneway,lanes,name,highway,maxspeed,length,bridge,ref,junction,tunnel,access,width,service,geometry
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
13879360,454636340,0,0,0.893827,446008556,False,1,Aliro Correa,living_street,,41.855,,,,,,,,"LINESTRING (-70.73850 -33.49002, -70.73804 -33..."


## Calculate and save nearest

In [10]:
if calculate_nearest:
    for source in source_list:
        print(f"Calculating and saving nearest for {source}.")
        # Directory where pois to be examined are located
        pois_dir = all_pois_dir + f'{source}.gpkg'
        # Load all pois from directory
        pois = gpd.read_file(pois_dir)
        # Set pois CRS
        try:
            pois = pois.to_crs("EPSG:4326")
        except:
            pois = pois.set_crs("EPSG:4326")
        # Clip pois to area of interest
        source_pois = gpd.sjoin(pois, aoi)
        # Find nearest
        nearest = aup.find_nearest(G, nodes, source_pois, return_distance= True)
        nearest = nearest.set_crs("EPSG:4326")
    
        if source in unique_id_sources:
            # Group by node (osmid) and polygon (green space) considering only the closest vertex (min)
            groupby = nearest.groupby(['osmid',goi_id]).agg({'distance_node':np.min})
            
            # Turns back into gdf merging back with nodes geometry
            geom_gdf = nodes.reset_index()[['osmid','geometry']]
            groupby.reset_index(inplace=True)
            nearest = pd.merge(groupby,geom_gdf,on='osmid',how='left')
            nearest = gpd.GeoDataFrame(nearest, geometry="geometry")
            
            # Filters for pois assigned to nodes at a maximum distance of 80 meters (aprox. 1 minute)
            # That is to consider a 1 minute additional walk as acceptable (if goi is inside a park, e.g. a bike lane).
            nearest = nearest.loc[nearest.distance_node <= 80]
        
        # Save nearest
        if save:
            nearest.to_file(output_dir + f"nearest_{source}.gpkg", driver='GPKG')

Calculating and saving nearest for carniceria.
Calculating and saving nearest for hogar.
Calculating and saving nearest for bakeries.
Calculating and saving nearest for supermercado.
Calculating and saving nearest for banco.
Calculating and saving nearest for ferias.
Calculating and saving nearest for local_mini_market.
Calculating and saving nearest for correos.
Calculating and saving nearest for centro_recyc.
Calculating and saving nearest for hospital_priv.
Calculating and saving nearest for hospital_pub.
Calculating and saving nearest for clinica_priv.
Calculating and saving nearest for clinica_pub.
Calculating and saving nearest for farmacia.
Calculating and saving nearest for vacunatorio_priv.
Calculating and saving nearest for vacunatorio_pub.
Calculating and saving nearest for consult_ado_priv.
Calculating and saving nearest for consult_ado_pub.
Calculating and saving nearest for salud_mental.
Calculating and saving nearest for labs_priv.
Calculating and saving nearest for resi

# Problem solving [Solved]

__Error appearing on Script 27:__

Traceback (most recent call last):
  File "/home/jovyan/accesibilidad-urbana/scripts/27-compiler_hqsl.py", line 1103, in <module>
    main(source_list, aoi, G, nodes, edges, walking_speed, local_save,santiago_tmp_fix)
  File "/home/jovyan/accesibilidad-urbana/scripts/27-compiler_hqsl.py", line 495, in main
    source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source,'length',walking_speed, 
  File "/home/jovyan/accesibilidad-urbana/aup/analysis.py", line 938, in pois_time
    nodes_distance_prep = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght='time_min',count_pois=count_pois)
  File "/home/jovyan/accesibilidad-urbana/aup/analysis.py", line 93, in calculate_distance_nearest_poi
    seeds = get_seeds(gdf_f, node_mapping, column_name)
  File "/home/jovyan/accesibilidad-urbana/aup/utils.py", line 252, in get_seeds
    return np.array(list(set([node_mapping[i] for i in gdf[column_name]])))
  File "/home/jovyan/accesibilidad-urbana/aup/utils.py", line 252, in <listcomp>
    return np.array(list(set([node_mapping[i] for i in gdf[column_name]])))
KeyError: 3482706297146

### Creating red_buena_calidad network

In [8]:
filtering_column = 'pje_ep'
filtering_value = 0.5 # Will keep equal or more than this value

# Filtered network - Load edges
edges_file = gpd.read_file(gral_dir+'calidad_ep/red_buena_calidad_single_parts.gpkg')
edges_file = edges_file.set_crs(projected_crs)
# Filtered network - Load nodes
nodes_file = gpd.read_file(gral_dir +'calidad_ep/red_buena_calidad_nodes.shp')
nodes_file = nodes_file.set_crs(projected_crs)
# Filtered network - Create navigable network
nodes, edges = aup.create_network(nodes_file, edges_file,projected_crs)
nodes = nodes.drop_duplicates(subset=['osmid'])
# Filtered network - Filter navigable network
edges_filt = edges.loc[edges[filtering_column] >= filtering_value]
# Filtered network - Prepare nodes
nodes_gdf = nodes.copy()
nodes_gdf.set_index('osmid',inplace=True)
nodes_gdf['x'] = nodes_gdf['geometry'].x
nodes_gdf['y'] = nodes_gdf['geometry'].y
# Filtered network -  Prepare edges
edges_gdf = edges_filt.copy()
edges_gdf.set_index(['u','v','key'],inplace=True)
# Filtered network - Create G and rename nodes and edges
G = ox.graph_from_gdfs(nodes_gdf, edges_gdf)
nodes = nodes_gdf.copy()
edges = edges_gdf.copy()

In [9]:
# Show
print(nodes.shape)
nodes.head(1)

(14537, 12)


Unnamed: 0_level_0,fid,Nom_Rut,pje_ep,vertex_pos,vertex_ind,vertex_par,vertex_p_1,distance,angle,geometry,x,y
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3509156302582,1.0,Costanera Sur,0.549756,0,0,0,0,0.0,191.098679,POINT (-70.60322 -33.40575),-70.603217,-33.405746


In [10]:
# Show
print(edges.shape)
edges.head(1)

(15771, 4)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Nom_Rut,pje_ep,geometry,length
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3509156302582,3509786302390,0,Costanera Sur,0.549756,"LINESTRING (-70.60322 -33.40575, -70.60328 -33...",229.719497


### The error on script 27 states "KeyError: 3482706297146".

Looking for that key in full network (full_test) and filtered network (red_buena_calidad) reveals that the node is in the full network but not in filtered network. Therefore, nearest cannot be different to nodes used in calculate_distance_nearest_poi. Need further adaptations.

In [11]:
full_test = full_nodes.reset_index()
full_test.loc[full_test.osmid == 3482706297146]

Unnamed: 0,osmid,fid,IDcllbf,shp_lng,st_lng_,Nom_Rut,Rol_Mop,Clas_Rt,Tip_Crp,Catego,...,buffer800,vertex_pos,vertex_ind,vertex_par,vertex_p_1,distance,angle,geometry,x,y
15078,3482706297146,18680.0,18665.0,65.93577,65.93577,San Camilo,,9,,Urbano,...,0.0,-1,1,0,1,65.93577,346.067342,POINT (-70.63257 -33.45438),-70.632573,-33.45438


In [12]:
filtered_test = nodes.reset_index()
filtered_test.loc[filtered_test.osmid == 3482706297146]

Unnamed: 0,osmid,fid,Nom_Rut,pje_ep,vertex_pos,vertex_ind,vertex_par,vertex_p_1,distance,angle,geometry,x,y


### Error solution: Keep osmids located in nodes gdf, not in nearest gdf.

In [14]:
poi_name = 'carniceria'
nearest = gpd.read_file(output_dir+f"nearest_{poi_name}.gpkg")

# Show
print(nearest.shape)
nearest.head(1)

(228, 20)


Unnamed: 0,rut,dv,vigenci,fecha,tipo_di,calle,numero,bloque,departa,villa_p,ciudad,comuna,region,rzn_scl,sngldrs,index_right,city,osmid,distance_node,geometry
0,50314080,2,N,2001-04-24,SUCURSAL,FRAY CAMILO HENRIQUEZ,988,,,ROL 2836-013,SANTIAGO,SANTIAGO,XIII REGION METROPOLITANA,FERNANDEZ DE CASO EMILIO Y OTROS,"FRAY CAMILO HENRIQUEZ 988, SANTIAGO, Chile",0,alamedabuffer_4500m,3482706297146,25.143867,POINT (-70.63244 -33.45458)


In [15]:
# Show
print(nodes.shape)
nodes.head(1)

(14537, 12)


Unnamed: 0_level_0,fid,Nom_Rut,pje_ep,vertex_pos,vertex_ind,vertex_par,vertex_p_1,distance,angle,geometry,x,y
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3509156302582,1.0,Costanera Sur,0.549756,0,0,0,0,0.0,191.098679,POINT (-70.60322 -33.40575),-70.603217,-33.405746


In [19]:
osmid_check_list = list(nodes.reset_index().osmid.unique())
nearest = nearest.loc[nearest.osmid.isin(osmid_check_list)]

# Show
print(nearest.shape)
nearest.head(1)

(144, 20)


Unnamed: 0,rut,dv,vigenci,fecha,tipo_di,calle,numero,bloque,departa,villa_p,ciudad,comuna,region,rzn_scl,sngldrs,index_right,city,osmid,distance_node,geometry
1,51041740,2,N,2008-07-09,SUCURSAL,ARTESANOS,719,,19,,SANTIAGO,RECOLETA,XIII REGION METROPOLITANA,ROJAS RENCORET ROSA PATRICIA Y OTRA,"ARTESANOS 719, RECOLETA, Chile",0,alamedabuffer_4500m,3466426299640,57.085929,POINT (-70.65008 -33.43129)
