## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


## Config and definitions

In [19]:
city = 'Aguascalientes'
version = 1
prox_measure = 'length'

# BASE DATA REQUIRED
# Area of interest (city)
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2015' #'metro_gdf_2015' or 'metro_gdf_2020'
# Network data (nodes and edges table for distance analysis,
# also used to generate the network G with which the nearest OSMID is assigned to each poi)
network_schema = 'osmnx'
nodes_table = 'nodes' #'nodes' or 'nodes_23_point'
edges_table = 'edges_speed' ################################# PENDIENTE
# Points of interest - DENUE
denue_schema = 'denue'
denue_table = 'denue_2020' #'denue_2020' or 'denue_23_point'
# Points of interest - CLUES
clues_schema = 'denue'
clues_table = 'clues' #'clues' or 'clues_23_point'
# Points of interest - SIP
sip_schema = 'denue'
sip_table = 'sip_2020' #'sip_2020' or 'sip_23_point'
# Hexgrid
hex_schema = 'hexgrid'
# Population data
pop_schema = 'censo'
pop_table = 'hex_bins_pop_2020' ################################# PENDIENTE

In [3]:
parameters = {'Escuelas':{'Preescolar':{'denue_preescolar':[611111, 611112]}}}
#                          'Primaria':{'denue_primaria':[611121, 611122]},
#                          'Secundaria':{'denue_secundaria':[611131, 611132]}
#                         }
#             }

source_weight = {'Escuelas':{'Preescolar':'max'}}, #There is only one source, no effect.
#                             'Primaria':'max',  #There is only one source, no effect.
#                             'Secundaria':'max'
#                            },  #There is only one source, no effect.
#                }

In [4]:
def get_denue_pois(denue_schema,denue_table,poly_wkt,code,version):
    # This function downloads the codigo_act denue poi requested for the analysis.
    # If it is version 2.0, applies a filter to certain pois.

    # Download denue pois
    query = f"SELECT * FROM {denue_schema}.{denue_table} WHERE (ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\')) AND (\"codigo_act\" = \'{code}\')"
    code_pois = aup.gdf_from_query(query, geometry_col='geometry')

     # Format denue pois
    code_pois = code_pois[['codigo_act', 'geometry']]
    code_pois = code_pois.rename(columns={'codigo_act':'code'})
    code_pois['code'] = code_pois['code'].astype('int64')
    
    return code_pois

## Script 21 (Simplified)

In [5]:
# Download area of interest
query = f"SELECT * FROM {metro_schema}.{metro_table} WHERE \"city\" LIKE \'{city}\'"
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')
mun_gdf = mun_gdf.set_crs("EPSG:4326")
aoi = mun_gdf.dissolve()

In [14]:
# Download Network used to calculate nearest note to each poi
G, nodes, edges_speed = aup.graph_from_hippo(aoi, schema=network_schema, edges_folder='edges_speed', nodes_folder=nodes_table)

# Show
print(edges_speed.shape)
edges_speed.head(1)

(121037, 19)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,oneway,lanes,name,highway,length,geometry,grade,grade_abs,access,tunnel,ref,maxspeed,bridge,junction,service,width,walkspeed,time_min
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3003502781,8424128014,0,296556921,True,1,Carretera Aguascalietes-San Marcos,primary,17.585,"LINESTRING (-102.20343 21.99742, -102.20327 21...",0.0,0.0,,,MEX 25,80,,,,,4.0,0.263775


In [16]:
# Download Network used to calculate nearest note to each poi
G, nodes, edges = aup.graph_from_hippo(aoi, schema=network_schema, edges_folder='edges', nodes_folder=nodes_table)

# Show
print(edges.shape)
edges.head(1)

(121067, 15)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,oneway,lanes,name,highway,maxspeed,length,geometry,bridge,ref,junction,tunnel,access,width,service
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
807059500,8547877099,0,66751891,False,2,Alfonso Esparza Oteo,tertiary,,6.828,"LINESTRING (-102.30495 21.87976, -102.30501 21...",,,,,,,


### Data 01 - aup.pois_time (Sets speed at the end)

In [20]:
poly_wkt = aoi.dissolve().geometry.to_wkt()[0]

i = 0
source_list = []

for eje in parameters.keys():
    for amenity in parameters[eje]:
        for source in parameters[eje][amenity]:

            source_list.append(source)
            # ANALYSIS - Select source points of interest
            source_pois = gpd.GeoDataFrame()
            for code in parameters[eje][amenity][source]:
                #If source is denue:
                if source[0] == 'd':
                    aup.log(f'--- Downloading denue source pois code {code} from db.')
                    code_pois = get_denue_pois(denue_schema,denue_table,poly_wkt,code,version)
                #If source is clues or sip:
                elif source[0] == 'c' or source[0] == 's':
                    aup.log(f'--- Getting clues/sip source pois code {code} from previously downloaded.')
                    code_pois = sip_clues_gdf.loc[sip_clues_gdf['code'] == code]
                else:
                    aup.log(f'--- Error, check parameters dicctionary.')
                    aup.log(f'--- Sources must start with denue_, clues_ or sip_.')
                    intended_crash
                    
                source_pois = pd.concat([source_pois,code_pois])

            # ANALYSIS - Calculate times from nodes to source
            source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure)
            source_nodes_time.rename(columns={'time':source},inplace=True)
            source_nodes_time = source_nodes_time[['osmid',source,'x','y','geometry']]

# Show
print(source_nodes_time.shape)
source_nodes_time.head(1)

Found and assigned nearest node osmid to each denue_preescolar.
Starting time analysis for denue_preescolar.
Starting range k = 1 of 2 for denue_preescolar.
Starting range k = 2 of 2 for denue_preescolar.
Finished time analysis for denue_preescolar.
(51390, 5)


Unnamed: 0,osmid,denue_preescolar,x,y,geometry
0,272921360,10.557825,-102.295073,21.872876,POINT (-102.29507 21.87288)


### Data 02 - aup.pois_time (Sets speed at the the beggining)

In [44]:
def pois_time_2(G, nodes, edges, pois, poi_name, prox_measure):
##########################################################################################
    
    # Step 1: NEAREST. Finds and assigns nearest node OSMID to each point of interest.
    # Defines projection for downloaded data
    pois = pois.set_crs("EPSG:4326")
    nodes = nodes.set_crs("EPSG:4326")
    edges = edges.set_crs("EPSG:4326")
    
    # If prox_measure = 'length', calculates time_min assuming walking speed = 4km/hr
    if prox_measure == 'length':
        edges['time_min'] = (edges['length']*60)/4000
        
    # In case there are no amenities of the type in the city, prevents it from crashing if len = 0
    if len(pois) == 0:
        nodes_time = nodes.copy()
        
        # Time is set to np.nan
        nodes_time['time'] = np.nan
        print(f"0 {poi_name} found. Time set to 0.")
        
        # Format
        nodes_time['source'] = poi_name
        nodes_time.reset_index(inplace=True)
        nodes_time = nodes_time.set_crs("EPSG:4326")
        nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]
        
        return nodes_time
        
    else:
        ### Calculate nearest node for each DENUE point
        nearest = aup.find_nearest(G, nodes, pois, return_distance= True)
        nearest = nearest.set_crs("EPSG:4326")
        print(f"Found and assigned nearest node osmid to each {poi_name}.")
        ##########################################################################################
        # Step 2: DISTANCE NEAREST POI. Calculates distance from each node to its nearest point of interest.
        # --------------- 2.1 FORMAT NETWORK
        # Fill NANs with mean times
        edges['time_min'].fillna(edges['time_min'].mean(),inplace=True)
		
		# --------------- 2.2 ELEMENTS NEEDED OUTSIDE THE LOOP
		# The pois are divided by batches of 200 or 250 pois and analysed using the function calculate_distance_nearest_poi
		# nodes_analysis is a nodes (index reseted) used in the function.
        nodes_analysis = nodes.reset_index().copy()
		# df_temp: Each column will store a batch of procesed nodes.
        df_temp = nodes.copy()
		#nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) 
		#				 to nodes_distance (output) keeping x,y and geometry data.
        nodes_distance = nodes.copy()
		
		# --------------- 2.3 PROCESSING DISTANCE
        print (f"Starting time analysis for {poi_name}.")
	
		# If possible, analyses by batches of 200 pois.
        if len(nearest) % 250:
            batch_size = len(nearest)/200
            for k in range(int(batch_size)+1):
                print(f"Starting range k = {k+1} of {int(batch_size)+1} for {poi_name}.")
                source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
                nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght='time_min')
		
				#A middle gdf is created whose columns will be the name of the poi and the batch number it belongs to
                df_int = pd.DataFrame()
                df_int['dist_'+str(k)+poi_name] = nodes_distance_prep['dist_'+poi_name]
				
				#The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
                df_temp = df_temp.merge(df_int, left_index=True, right_index=True)
		
			# Once finished, drop the non-distance values from the temporary gdf
            df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)
		
			#We apply the min function to find the minimum value. This value is sent to a new df_min
            df_min = pd.DataFrame()
            df_min['dist_'+poi_name] = df_temp.min(axis=1)
		
			#We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
            nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)
    
            # Copy
            nodes_time = nodes_distance.copy()
            nodes_time['time'] = nodes_time['dist_'+poi_name]
		
		# Else, analyses by batches of 250 pois
        else:
            batch_size = len(nearest)/250
            for k in range(int(batch_size)+1):
                print(f"Starting range k = {k+1} of {int(batch_size)+1} for source {poi_name}.")
                source_process = nearest.iloc[int(250*k):int(250*(1+k))].copy()
                nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght='time_min')
		
				#A middle gdf is created whose columns will be the name of the poi and the batch number it belongs to
                df_int = pd.DataFrame()
                df_int['dist_'+str(k)+poi_name] = nodes_distance_prep['dist_'+poi_name]
		
				#The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
                df_temp = df_temp.merge(df_int, left_index=True, right_index=True)
		
			# Once finished, drop the non-distance values from the temporary gdf
            df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)
		
			#We apply the min function to find the minimum value. This value is sent to a new df_min
            df_min = pd.DataFrame()
            df_min['dist_'+poi_name] = df_temp.min(axis=1)
		
			#We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
            nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)

            # Copy
            nodes_time = nodes_distance.copy()
            nodes_time['time'] = nodes_time['dist_'+poi_name]

        print(f"Finished time analysis for {poi_name}.")

		##########################################################################################
		# Step 3: FORMAT. Adds poi name (source), organices and filters data.
        nodes_time['source'] = poi_name
        nodes_time.reset_index(inplace=True)
        nodes_time = nodes_time.set_crs("EPSG:4326")
        nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]
		
        return nodes_time

In [45]:
poly_wkt = aoi.dissolve().geometry.to_wkt()[0]

i = 0
source_list = []

for eje in parameters.keys():
    for amenity in parameters[eje]:
        for source in parameters[eje][amenity]:

            source_list.append(source)
            # ANALYSIS - Select source points of interest
            source_pois = gpd.GeoDataFrame()
            for code in parameters[eje][amenity][source]:
                #If source is denue:
                if source[0] == 'd':
                    aup.log(f'--- Downloading denue source pois code {code} from db.')
                    code_pois = get_denue_pois(denue_schema,denue_table,poly_wkt,code,version)
                #If source is clues or sip:
                elif source[0] == 'c' or source[0] == 's':
                    aup.log(f'--- Getting clues/sip source pois code {code} from previously downloaded.')
                    code_pois = sip_clues_gdf.loc[sip_clues_gdf['code'] == code]
                else:
                    aup.log(f'--- Error, check parameters dicctionary.')
                    aup.log(f'--- Sources must start with denue_, clues_ or sip_.')
                    intended_crash
                    
                source_pois = pd.concat([source_pois,code_pois])

            # ANALYSIS - Calculate times from nodes to source
            source_nodes_time_2 = pois_time_2(G, nodes, edges, source_pois, source, prox_measure)
            source_nodes_time_2.rename(columns={'time':source},inplace=True)
            source_nodes_time_2 = source_nodes_time_2[['osmid',source,'x','y','geometry']]

# Show
print(source_nodes_time_2.shape)
source_nodes_time_2.head(1)

Found and assigned nearest node osmid to each denue_preescolar.
Starting time analysis for denue_preescolar.
Starting range k = 1 of 2 for denue_preescolar.
Starting range k = 2 of 2 for denue_preescolar.
Finished time analysis for denue_preescolar.
(51390, 5)


Unnamed: 0,osmid,denue_preescolar,x,y,geometry
0,272921360,10.557825,-102.295073,21.872876,POINT (-102.29507 21.87288)


In [59]:
original = source_nodes_time.copy()
prueba = source_nodes_time_2[['osmid','denue_preescolar']]
prueba.rename(columns={'denue_preescolar':'denue_preescolar_2'},inplace=True)

compare = pd.merge(original,prueba,on='osmid')

compare['diferencia'] = compare['denue_preescolar']-compare['denue_preescolar_2']

# Show
print(f'Promedio anterior: {compare.denue_preescolar.mean()}.')
print(f'Promedio nuevo: {compare.denue_preescolar_2.mean()}.')
print(f'Diferencia total en tiempos unitarios: {compare.diferencia.sum()}.')
print(compare.shape)
compare.head(1)

Promedio anterior: 32.06047105312317.
Promedio nuevo: 32.06047105312317.
Diferencia total en tiempos unitarios: -4.57492654870606e-11.
(51390, 7)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prueba.rename(columns={'denue_preescolar':'denue_preescolar_2'},inplace=True)


Unnamed: 0,osmid,denue_preescolar,x,y,geometry,denue_preescolar_2,diferencia
0,272921360,10.557825,-102.295073,21.872876,POINT (-102.29507 21.87288),10.557825,1.776357e-15
