In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


In [2]:
city = 'Aguascalientes'
version = 1
prox_measure = 'time_min'

# BASE DATA REQUIRED
# Area of interest (city)
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2015' #'metro_gdf_2015' or 'metro_gdf_2020'
# Network data (nodes and edges table for distance analysis,
# also used to generate the network G with which the nearest OSMID is assigned to each poi)
network_schema = 'osmnx'
nodes_table = 'nodes' #'nodes' or 'nodes_23_point'
edges_table = 'edges_speed' ################################# PENDIENTE
# Points of interest - DENUE
denue_schema = 'denue'
denue_table = 'denue_2020' #'denue_2020' or 'denue_23_point'
# Points of interest - CLUES
clues_schema = 'denue'
clues_table = 'clues' #'clues' or 'clues_23_point'
# Points of interest - SIP
sip_schema = 'denue'
sip_table = 'sip_2020' #'sip_2020' or 'sip_23_point'
# Hexgrid
hex_schema = 'hexgrid'
# Population data
pop_schema = 'censo'
pop_table = 'hex_bins_pop_2020' ################################# PENDIENTE

In [3]:
parameters = {'Escuelas':{'Preescolar':{'denue_preescolar':[611111, 611112]}}}
#                          'Primaria':{'denue_primaria':[611121, 611122]},
#                          'Secundaria':{'denue_secundaria':[611131, 611132]}
#                         }
#             }

source_weight = {'Escuelas':{'Preescolar':'max'}}, #There is only one source, no effect.
#                             'Primaria':'max',  #There is only one source, no effect.
#                             'Secundaria':'max'
#                            },  #There is only one source, no effect.
#                }

In [4]:
def get_denue_pois(denue_schema,denue_table,poly_wkt,code,version):
    # This function downloads the codigo_act denue poi requested for the analysis.
    # If it is version 2.0, applies a filter to certain pois.

    # Download denue pois
    query = f"SELECT * FROM {denue_schema}.{denue_table} WHERE (ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\')) AND (\"codigo_act\" = \'{code}\')"
    code_pois = aup.gdf_from_query(query, geometry_col='geometry')

     # Format denue pois
    code_pois = code_pois[['codigo_act', 'geometry']]
    code_pois = code_pois.rename(columns={'codigo_act':'code'})
    code_pois['code'] = code_pois['code'].astype('int64')
    
    return code_pois

In [5]:
# Download area of interest
query = f"SELECT * FROM {metro_schema}.{metro_table} WHERE \"city\" LIKE \'{city}\'"
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')
mun_gdf = mun_gdf.set_crs("EPSG:4326")
aoi = mun_gdf.dissolve()

In [6]:
# Download Network used to calculate nearest note to each poi
G, nodes, edges = aup.graph_from_hippo(aoi, schema=network_schema, edges_folder=edges_table, nodes_folder=nodes_table)

In [7]:
# Show
print(nodes.shape)
print(edges.shape)

(51434, 4)
(121037, 19)


In [8]:
poly_wkt = aoi.dissolve().geometry.to_wkt()[0]

i = 0
source_list = []

for eje in parameters.keys():
    for amenity in parameters[eje]:
        for source in parameters[eje][amenity]:

            source_list.append(source)
            # ANALYSIS - Select source points of interest
            source_pois = gpd.GeoDataFrame()
            for code in parameters[eje][amenity][source]:
                #If source is denue:
                if source[0] == 'd':
                    aup.log(f'--- Downloading denue source pois code {code} from db.')
                    code_pois = get_denue_pois(denue_schema,denue_table,poly_wkt,code,version)
                #If source is clues or sip:
                elif source[0] == 'c' or source[0] == 's':
                    aup.log(f'--- Getting clues/sip source pois code {code} from previously downloaded.')
                    code_pois = sip_clues_gdf.loc[sip_clues_gdf['code'] == code]
                else:
                    aup.log(f'--- Error, check parameters dicctionary.')
                    aup.log(f'--- Sources must start with denue_, clues_ or sip_.')
                    intended_crash
                    
                source_pois = pd.concat([source_pois,code_pois])

            # ANALYSIS - Calculate times from nodes to source
            #source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure)

In [9]:
# Show
print(source_pois.shape)
source_pois.head(1)

(306, 2)


Unnamed: 0,code,geometry
0,611111,POINT (-102.27464 21.90191)


# STARTS SCRIPT/FUNCTIONS REDEVELOPMENT

In [14]:
test = True

## DESGLOSE DE LA FUNCIÓN aup.pois_time

In [93]:
### DESGLOSE DE LA FUNCIÓN aup.pois_time

if test:
    # Objetivo:
    #source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure)
    
    # Función usada:
    #def pois_time(G, nodes, edges, pois, poi_name, prox_measure):
    
    # Base data needed:
    pois = source_pois.copy()
    poi_name = source

In [94]:
### DESGLOSE DE LA FUNCIÓN aup.pois_time

if test:
    ##########################################################################################
    # Step 1: NEAREST. Finds and assigns nearest node OSMID to each point of interest.
       
    # Defines projection for downloaded data
    pois = pois.set_crs("EPSG:4326")
    nodes = nodes.set_crs("EPSG:4326")
    edges = edges.set_crs("EPSG:4326")
    
    # In case there are no amenities of the type in the city, prevents it from crashing if len = 0
    if len(pois) == 0:
        nodes_time = nodes.copy()
        # Time is set to np.nan
        nodes_time['time'] = np.nan
        print(f"0 {poi_name} found. Time set to 0.")
        # Format
        nodes_time['source'] = poi_name
        nodes_time.reset_index(inplace=True)
        nodes_time = nodes_time.set_crs("EPSG:4326")
        nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]
        #return nodes_time
    
    else:
        ### Calculate nearest node for each DENUE point
        nearest = aup.find_nearest(G, nodes, pois, return_distance= True)
        nearest = nearest.set_crs("EPSG:4326")
        print(f"Found and assigned nearest node osmid to each {poi_name}.")
    
        ##########################################################################################
        # Step 2: DISTANCE NEAREST POI. Calculates distance from each node to its nearest point of interest.
            
        # --------------- 2.1 FORMAT NETWORK
        # Fill NANs with mean times
        edges[prox_measure].fillna(edges[prox_measure].mean(),inplace=True)
        
        # --------------- 2.2 ELEMENTS NEEDED OUTSIDE THE LOOP
        # The pois are divided by batches of 200 or 250 pois and analysed using the function calculate_distance_nearest_poi
        # nodes_analysis is a nodes (index reseted) used in the function.
        nodes_analysis = nodes.reset_index().copy()
        # df_temp: Each column will store a batch of procesed nodes.
        df_temp = nodes.copy()
        #nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) 
        #				 to nodes_distance (output) keeping x,y and geometry data.
        nodes_distance = nodes.copy()
        
        # --------------- 2.3 PROCESSING DISTANCE
        print (f"Starting time analysis for {poi_name}.")
        
        # If possible, analyses by batches of 200 pois.
        if len(nearest) % 250:
            batch_size = len(nearest)/200
            for k in range(int(batch_size)+1):
                print(f"Starting range k = {k+1} of {int(batch_size)+1} for {poi_name}.")
                source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
                #nodes_distance_prep = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure)

Found and assigned nearest node osmid to each denue_preescolar.
Starting time analysis for denue_preescolar.
Starting range k = 1 of 2 for denue_preescolar.
Starting range k = 2 of 2 for denue_preescolar.


## DESGLOSE DE LAS FUNCIONES aup.calculate_distance_nearest_poi & aup.get_distances

### 01 - Exploración del código

In [45]:
# DESGLOSE DE LA FUNCIÓN aup.calculate_distance_nearest_poi

if test:
    # Objetivo:
    #nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure, get_nearest_poi=(False, 'osmid'))
    
    # Función usada:
    # def calculate_distance_nearest_poi(gdf_f, nodes, edges, amenity_name, column_name, wght='length', get_nearest_poi=(False, 'poi_id_column') max_distance=(0,'distance_node'))
    
    # Base data needed:
    gdf_f = source_process.copy()
    nodes = nodes_analysis.copy()
    amenity_name = poi_name
    column_name = 'osmid'
    wght = prox_measure
    get_nearest_poi=(True, 'osmid')
    max_distance=(0,'distance_node')

In [48]:
# DESGLOSE DE LA FUNCIÓN aup.calculate_distance_nearest_poi

if test:
    # --- Required processing
	nodes = nodes.copy()
	edges = edges.copy()
	if max_distance[0] > 0:
		gdf_f = gdf_f.loc[gdf_f[max_distance[1]]<=max_distance[0]]
	g, weights, node_mapping = aup.to_igraph(nodes,edges,wght=wght) #convert to igraph to run the calculations
	seeds = aup.get_seeds(gdf_f, node_mapping, column_name)
	voronoi_assignment = aup.voronoi_cpu(g, weights, seeds)

    #if get_nearest_poi[0]: # Return distances and nearest poi idx
    	#distances, nearest_poi_idx = get_distances(g,seeds,weights,voronoi_assignment,get_nearest_poi=True)

In [47]:
# DESGLOSE DE LA FUNCIÓN aup.get_distances

if test:
    # Objetivo:
    # distances, nearest_poi_idx = get_distances(g,seeds,weights,voronoi_assignment,get_nearest_poi=True)
    
    # Función usada:
    # def get_distances(g, seeds, weights, voronoi_assignment, get_nearest_poi=False):
    
    # Base data needed:
    get_nearest_poi=True
    shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))
    distances = [np.min(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]

  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))


In [44]:
if test:
    # Matriz de interés. Contiene los tiempos.

    # Show
    print(shortest_paths)

[[113.33214179 115.88226669 378.65469722 ... 442.08203755 446.50099706
  445.02305781]
 [ 78.58497605  76.96433302 333.88721812 ... 411.47954795 415.89850746
  414.42056822]
 [ 85.56306961  88.11319451 350.88562504 ... 386.46582687 390.88478638
  389.40684714]
 ...
 [ 87.06053584  85.43989281 309.5042713  ... 413.90823148 418.32719099
  416.84925174]
 [180.1850379  182.73516281 445.50759333 ... 444.69173697 449.11069648
  447.63275723]
 [101.15011665 102.08379497 360.33744754 ... 429.90001241 434.31897192
  432.84103267]]


In [43]:
if test:
    # Ejemplo de función que se utiliza para conocer la nearest amenity
    # nearest_poi_idx = [np.argmin(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]
    
    # Show
    print(shortest_paths[:,0])

[113.33214179  78.58497605  85.56306961 117.52311897  68.60085329
  51.56463007 170.9175337   75.07727785 223.8291327   58.42020168
 315.31174911 110.61660483 162.72606779 228.3471477   92.96721778
  53.68238175 173.65663025 100.35295246  74.48106063 291.35137183
 370.29557164  42.3794578  204.13221372 230.21278095  55.58135656
 208.22490023 156.06895672  69.85007677 221.16000957  51.39209932
 199.28687143 106.05166432  51.26005005 101.42330262 170.48918077
  50.06991488  61.11640661  54.79656954  81.26428866  65.79941447
  63.5149553  133.22728122  87.22870857 202.90183309 234.88127472
 211.8147678   65.69617246  74.70116732 357.27566147 142.21288107
 105.08072001 119.37030475 101.54756158  90.45926562  99.52946609
  91.3153677  204.08824287 167.11815649 182.94639004  67.71718786
  11.61862373 104.89382074  29.22382747 205.38705709 100.35583427
 216.88135097  80.91884763 215.36086315  66.82910877  27.23033332
 255.72981364 145.04748389 114.25754681  61.03697984  66.07271408
 163.45513

### 02 - Desarrollo del cambio a realizar para que cuente las amenidades a x tiempo (e.g. 15 mins)

In [56]:
### PRUEBA DEL MÉTODO PARA LA FUNCIÓN get_distances

if test:
    # Base data
    data = np.array([[10, 20],
                     [20, 83],
                     [1, 15]])
    
    # Test 1 (failed)
    #prueba = np.where( data <= 15 )
    # Test 2 (Success)
    prueba = [len(np.where(data[:,i] <= 15)[0]) for i in range(2)] #in range of number of columns
    # Show
    print(prueba)

[2, 1]


### 03 - Prueba de redefinición de las funciones

In [82]:
if test:
    def get_distances(g, seeds, weights, voronoi_assignment, get_nearest_poi=False, count_pois = 0):
    
        shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))
        distances = [np.min(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]
        
        if get_nearest_poi:
            nearest_poi_idx = [np.argmin(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]
        
        if (count_pois != 0):
            near_count = [len(np.where(shortest_paths[:,i] <= count_pois)[0]) for i in range(len(voronoi_assignment))]
    
        # Return options
        if get_nearest_poi and (count_pois != 0):
            return distances, nearest_poi_idx, near_count
        elif get_nearest_poi:
            return distances, nearest_poi_idx
        elif (count_pois != 0):
            return distances, near_count
        else:
            return distances

In [83]:
if test:
    def calculate_distance_nearest_poi(gdf_f, nodes, edges, amenity_name, column_name, 
    wght='length', get_nearest_poi=(False, 'poi_id_column'),count_pois=0, max_distance=(0,'distance_node')):
                                       
        nodes = nodes.copy()
        edges = edges.copy()
        if max_distance[0] > 0:
            gdf_f = gdf_f.loc[gdf_f[max_distance[1]]<=max_distance[0]]
        g, weights, node_mapping = aup.to_igraph(nodes,edges,wght=wght) #convert to igraph to run the calculations
        col_weight = f'dist_{amenity_name}'
        seeds = aup.get_seeds(gdf_f, node_mapping, column_name)
        voronoi_assignment = aup.voronoi_cpu(g, weights, seeds)
    
        # Return distances, nearest poi and near count
        if get_nearest_poi[0] and (count_pois != 0):
            distances, nearest_poi_idx, near_count = get_distances(g,seeds,weights,voronoi_assignment,
                                                                   get_nearest_poi=True, 
                                                                   count_pois=count_pois)
            nearest_poi = [gdf_f.iloc[i][get_nearest_poi[1]] for i in nearest_poi_idx]
            nodes[f'{count_pois}min_{amenity_name}'] = near_count
            nodes[f'nearest_{amenity_name}'] = nearest_poi
            
        # Return distances and nearest poi
        elif get_nearest_poi[0]:
            distances, nearest_poi_idx = get_distances(g,seeds,weights,voronoi_assignment,
                                                       get_nearest_poi=True)
            nearest_poi = [gdf_f.iloc[i][get_nearest_poi[1]] for i in nearest_poi_idx]
            nodes[f'nearest_{amenity_name}'] = nearest_poi
    
        # Return distances and near count
        elif (count_pois != 0):
            distances, near_count = get_distances(g,seeds,weights,voronoi_assignment,
                                                  count_pois=count_pois)
            nodes[f'{count_pois}min_{amenity_name}'] = near_count
    
        # Return distances only
        else:
            distances = get_distances(g,seeds,weights,voronoi_assignment)
    
        nodes[col_weight] = distances
    
        nodes.replace([np.inf, -np.inf], np.nan, inplace=True)
        idx = pd.notnull(nodes[col_weight])
        nodes = nodes[idx].copy()
    
        return nodes

### 04 - Resultado de las funciones redefinidas

In [88]:
# Prueba 1: return distance
nodes_distance_prep_01 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure)

# Show
print(nodes_distance_prep_01.shape)
nodes_distance_prep_01.head(1)

  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))


(51371, 6)


Unnamed: 0_level_0,index,x,y,street_count,geometry,dist_denue_preescolar
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
272921360,0,-102.295073,21.872876,3,POINT (-102.29507 21.87288),11.618624


In [89]:
# Prueba 2: return distance and nearest_poi
nodes_distance_prep_02 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                     get_nearest_poi=(True, 'osmid'))

# Show
print(nodes_distance_prep_02.shape)
nodes_distance_prep_02.head(1)

  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))


(51371, 7)


Unnamed: 0_level_0,index,x,y,street_count,geometry,nearest_denue_preescolar,dist_denue_preescolar
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
272921360,0,-102.295073,21.872876,3,POINT (-102.29507 21.87288),936100055,11.618624


In [90]:
# Prueba 3: return distance and count amenities at 15 mins
nodes_distance_prep_02 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                     count_pois = 15)

# Show
print(nodes_distance_prep_02.shape)
nodes_distance_prep_02.head(1)

  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))


(51371, 7)


Unnamed: 0_level_0,index,x,y,street_count,geometry,15min_denue_preescolar,dist_denue_preescolar
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
272921360,0,-102.295073,21.872876,3,POINT (-102.29507 21.87288),1,11.618624


In [91]:
# Prueba 4: return distance and count amenities at 30 mins
nodes_distance_prep_02 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                     count_pois = 30)

# Show
print(nodes_distance_prep_02.shape)
nodes_distance_prep_02.head(1)

  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))


(51371, 7)


Unnamed: 0_level_0,index,x,y,street_count,geometry,30min_denue_preescolar,dist_denue_preescolar
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
272921360,0,-102.295073,21.872876,3,POINT (-102.29507 21.87288),5,11.618624


In [92]:
# Prueba 5: return distance, nearest poi and count amenities at 45 mins
nodes_distance_prep_03 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                     get_nearest_poi=(True, 'osmid'),
                                                     count_pois = 45,
                                                     max_distance=(0,'distance_node'))

# Show
print(nodes_distance_prep_03.shape)
nodes_distance_prep_03.head(1)

  shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))


(51371, 8)


Unnamed: 0_level_0,index,x,y,street_count,geometry,45min_denue_preescolar,nearest_denue_preescolar,dist_denue_preescolar
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
272921360,0,-102.295073,21.872876,3,POINT (-102.29507 21.87288),9,936100055,11.618624


## TERMINA DESGLOSE DE FUNCIONES aup.calculate_distance_nearest_poi & aup.get_distances
## DESGLOSE DE LA FUNCIÓN aup.pois_time

In [95]:
# Siguientes pasos:

# Continuar con aup.pois_time para lo que pasa después de los batches. Sumar las amenidades encontradas.

# DUDA DE LO QUE PASA CUANDO SE USA 'lenght'
# DUDA EDGAR: Cuando se usa 'lenght' en lugar de 'time_min', calcula la 'time_min' asumiendo 4km/hr después de realizar el calculo de amenidades a x minutos. (En realidad se le pasa metros, no minutos).
# ¿Se podría generar una columna en 'edges' llamada 'time_min' y siempre realizar aup.calculate_distance_nearest_poi con 'time_min'?
# Opción b: Colocar un bloqueo, si prox_measure == 'lenght' no permitir que regrese cantidad de amenidades cerca.

# Terminar y actualizar la función aup.pois_time. Continuar con lo que pasa en el resto del script (Mean time en hexágonos)

# FINISHES FUNCTIONS REDEVELOPMENT