# 98c - Script 21 - dev count pois function at requested time proximity

This notebook helped modify Script 21, function aup.pois_time, function aup.calculate_distance_nodes and function aup.get_distances in order to add the argument count_pois.The argument adds to output (optional, if requested) the number of amenities at a given time from each node (average in hexs).

Example:
count_pois = (True,15) will return how many of each amenity is within 15 minutes in the form of eje_15min and amenity_15min columns.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


In [2]:
test = False

## Required script 21 data

In [39]:
city = 'Aguascalientes'
version = 2

if version == 1: #Prox analysis 2020 version
    cultural_dicc = {'denue_cines':[512130],
                     'denue_museos':[712111, 712112]}
    cultural_weight =  'min' # Will choose min time to source because measuring access to nearest source, doesn't matter which.

elif version == 2: #Prox analysis 2024 version
    cultural_dicc = {'denue_cines':[512130],
                    'denue_museos':[712111, 712112],
                    'denue_bibliotecas':[519121,519122],
                    'denue_centrocultural':[711312]}
    cultural_weight =  'two-method'
else:
    aup.log("--- Error in specified proximity analysis version.")
    aup.log("--- Must pass integers 1 or 2.")
    intended_crash

# ---------------------------- BASE DATA REQUIRED ----------------------------
# Area of interest (city)
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2015' #'metro_gdf_2015' or 'metro_gdf_2020'
# Network data (nodes and edges table for distance analysis,
# also used to generate the network G with which the nearest OSMID is assigned to each poi)
network_schema = 'osmnx'
nodes_table = 'nodes' #'nodes' or 'nodes_23_point'
edges_table = 'edges_speed' ################################# PENDIENTE
# Points of interest - DENUE
denue_schema = 'denue'
denue_table = 'denue_2020' #'denue_2020' or 'denue_23_point'
# Points of interest - CLUES
clues_schema = 'denue'
clues_table = 'clues' #'clues' or 'clues_23_point'
# Points of interest - SIP
sip_schema = 'denue'
sip_table = 'sip_2020' #'sip_2020' or 'sip_23_point'
# Hexgrid
hex_schema = 'hexgrid'
# Population data
pop_schema = 'censo'
pop_table = 'hex_bins_pop_2020' ################################# PENDIENTE

# ---------------------------- ANALYSIS AND OUTPUT OPTIONS ----------------------------
# Network distance method used in function pois_time. (If length, assumes pedestrian speed of 4km/hr.)
prox_measure = 'time_min' # Must pass 'length' or 'time_min'

# Count available amenities at given time proximity (minutes)?
count_pois = (True,15) # Must pass a tupple containing a boolean (True or False) and time proximity of interest in minutes (Boolean,time)

# If pop_output = True, loads pop data from pop_schema and pop_table.
# If pop_output = False, loads empty hexgrid.
pop_output = False

# Hexagon resolutions of output
# If pop_output == True, allows res 8 only.
# If pop_output == False and version == 1, allows res 8 and 9 only (res 10 available, but doesn't have a 'metropolis' or 'city' column.
# If pop_output == False and version == 2 , allows res 8, 9, 10 and 11 only.
res_list = [7,8,9,10,11,12] 

# SAVING
# Save final output to db?
save = False
save_schema = 'prox_analysis'
save_table = 'proximityanalysis_24_ageb_hex'
# Local save? (Runs Aguascalientes for tests)
local_save = False
local_save_dir = '../../data/external/temporal_fromjupyter/proximity_v2/test_proxanalysis_scriptv2_poisproxcount.gpkg'

In [40]:
####################################################################################################################################
# Simplified diccs for notebook testing.
####################################################################################################################################

parameters = {'Entretenimiento':{'Social':{'denue_restaurante_insitu':[722511, 722512, 722513, 722514, 722519],
                                            'denue_restaurante_llevar':[722516, 722518, 722517],
                                            'denue_bares':[722412],
                                            'denue_cafe':[722515]},
                                    'Actividad física':{'sip_cancha':[93110],
                                                        'sip_unidad_deportiva':[93111],
                                                        'sip_espacio_publico':[9321],
                                                        'denue_parque_natural':[712190]},
                                    'Cultural':cultural_dicc
                                } 
             }

source_weight = {'Entretenimiento':{'Social':'max', # ////////////////////////////////////////////////// Will choose max time to source because measuring access to all of them.
                                    'Actividad física':'min', # //////////////////////////////////////// Will choose min time to source because measuring access to nearest source, doesn't matter which.
                                    'Cultural':cultural_weight} # //////////////////////////////////////////////// Depends on version.
                    }

In [5]:
####################################################################################################################################
# Simplified version, for centro cultural only. Full code filters dif (Not used in this notebook test).
####################################################################################################################################
def get_denue_pois(denue_schema,denue_table,poly_wkt,code,version):
    # This function downloads the codigo_act denue poi requested for the analysis.
    # If it is version 2.0, applies a filter to certain pois.

    # Download denue pois
    query = f"SELECT * FROM {denue_schema}.{denue_table} WHERE (ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\')) AND (\"codigo_act\" = \'{code}\')"
    code_pois = aup.gdf_from_query(query, geometry_col='geometry')

     # Version 2.0 pois filter
    if version == 1:
        aup.log("--- No filter applied.")
    elif version == 2:
        if code == 931610: #denue_dif
            aup.log(f"--- Applying filtering to code {code}.")
            dif = code_pois.copy()
            # Sets word of amenity to avoid in nom_estab
            words_toavoid = [# Culturales
                            'ARTE', #incluye ARTES, CONARTE
                            'MEDIATECA', 'MUSICA','ORQUESTA', #incluye MUSICAL, ORQUESTAS
                            # Instituciones
                            'CONAFE','CONACYT', #incluye CULTURAL
                            'TRIBUNAL','PROTECCION CIVIL','IMM',
                            # Salud
                            'IMMS','ISSTE','INAPAM','SEGURO','POPULAR','FOVI', #incluye FOVISSTE, FOVILEON, etc #
                            'CAPASITIS',#Centro ambulatorio para la prevención y atención del SIDA e infecciones de transmision sexual
                            'SANITARI', #SANITARIO/SANITARIA
                            'MEDIC', #MEDICO/MEDICA
                            # Educación
                            'INEA','PRIMARIA','SECUNDARIA','PREPARATORIA','MAESTROS','BECA','ASESORIA','APOYO',
                            'USAER', #Unidad de Servicio de Apoyo a la Educación Regular
                            'EDUCA', #EDUCACION, EDUCACIÓN, EDUCATIVO, EDUCATIVA
                            # Vivienda
                            'VIVIENDA','INFONAVIT',
                            # Oficinas
                            'COORDINA','CORDINA', #incluye COORDINACION, y typos (CORDINACION)
                            'DIRECCION','DIVISION','INSPECCION','INSTITUTO','JEFATURA','JURISDICCION','OFICINA','PROGRAMA','PROCURADORIA','PROCURADURIA',
                            'RECAUDACION','PAPELERIA','REGION ','REGULACION','SECRETARIA','DELEGACION','SUPERVI',
                            'ADMINISTRA',#ADMINISTRATIVO, ADMINISTRATIVA
                            'ANALISIS', 'SEGUIMIENTO','MICRORED','MICRO RED',
                            # Almacenes y bodegas
                            'ALMACEN','BODEGA','ARCHIVO','ACTIVO',
                            'PROVEED', #PROVEEDOR, PROVEEDORA
                            # Otros
                            'JUNTA', # para juntas de mejoras
                            'POLIVALENTE',
                            'SERVICIO',
                            'GIMNASIO']
            # Set checker
            dif['keep'] = 1
            for word in words_toavoid:
                # Reset word_coincidence_count column
                dif['word_coincidence_count'] = 0
                # Look for word coincidence (0 = absent, 1 = present)
                dif['word_coincidence_count'] = dif['nom_estab'].apply(lambda x: x.count(word))
                # If the word is present, do not keep
                dif.loc[dif.word_coincidence_count > 0,'keep'] = 0
            # Filter and return to rest of function (Formats later)
            dif_filtered = dif.loc[dif['keep'] == 1]
            dif_filtered.drop_duplicates(inplace=True)
            code_pois = dif_filtered.copy()
        
        elif code == 711312: #denue_centro_cultural
            aup.log(f"--- Applying filtering to code {code}.")
            centro_cultural = code_pois.copy()
            amenities_ofinterest = ['CENTRO',
                                    'CULTURA', #incluye CULTURAL
                                    'LIENZO',
                                    'PLAZA',
                                    'ARENA',
                                    'AUDITORIO',
                                    'TEATRO',
                                    'ARTE', # incluye ARTES
                                    'MUSEO']
            # Filter 
            centro_cultural_filtered = gpd.GeoDataFrame()
            for amenity in amenities_ofinterest:
                tmp = centro_cultural.loc[centro_cultural['nom_estab'].str.contains(amenity, regex=False)]
                centro_cultural_filtered = pd.concat([centro_cultural_filtered, tmp])
            # Return to rest of function
            centro_cultural_filtered.drop_duplicates(inplace=True)
            code_pois = centro_cultural_filtered.copy()
        else:
            aup.log("--- No filter applied.")
    else:
        aup.log("--- Error in specified proximity analysis version.")
        aup.log("--- Must pass integers 1 or 2.")
        intended_crash

    # Format denue pois
    code_pois = code_pois[['codigo_act', 'geometry']]
    code_pois = code_pois.rename(columns={'codigo_act':'code'})
    code_pois['code'] = code_pois['code'].astype('int64')

    return code_pois

In [42]:
def two_method_check(row):
    # This function is used to decide which time to choose for cultural amenities.
    # Why:
        # In version 2 we aded 'Bibliotecas'. The source contains plenty of pois.
        # This might dilute other cultural sources. Therefore:

    # If 2 or more source amenities are within 15 minutes, 
    # chooses max time of the sources within 15 minutes.
    # (Measures proximity to an amenity which we know is close.)
    if row['check_count'] > 1:
        # Identify sources within 15 minutes
        prox_sources=[]
        for s in check_lst:
            if row[s] == 1:
                prox_sources.append(s.replace('_check',''))
        # Find max of those sources
        row['max_'+a.lower()] = row[prox_sources].max()

    # Else (just 1 or 0 source amenities are within 15 minutes),
    # chooses min time of the amenities outside 15 minutes. 
    # (Ignores if only one is close (most likely bibliotecas), takes next closest)
    else:
        # Identify sources outside 15 minutes
        prox_sources=[]
        for s in check_lst:
            if row[s] == 0:
                prox_sources.append(s.replace('_check',''))
        # Find min of those sources
        row['max_'+a.lower()] = row[prox_sources].min()
        
    return row

## Script 21

In [6]:
############################################################### PART 1 ###############################################################
#################################################### FIND NODES PROXIMITY TO POIS ####################################################
###################################################### (PREV. SCRIPT 01 + 02) ########################################################

# 1.1 --------------- BASE DATA FOR POIS-NODES ANALYSIS
# ------------------- This first step downloads the area of interest and network used to measure distance.

# Download area of interest
query = f"SELECT * FROM {metro_schema}.{metro_table} WHERE \"city\" LIKE \'{city}\'"
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')
mun_gdf = mun_gdf.set_crs("EPSG:4326")
aoi = mun_gdf.dissolve()

# Download Network used to calculate nearest note to each poi
G, nodes, edges = aup.graph_from_hippo(aoi, schema=network_schema, edges_folder=edges_table, nodes_folder=nodes_table)

# Show
print(nodes.shape)
print(edges.shape)

(51434, 4)
(121037, 19)


In [7]:
# 1.2 --------------- DOWNLOAD POINTS OF INTEREST (clues and sip pois, not denue)
# ------------------- This step downloads SIP and CLUES points of interest (denue pois are downloaded later.)
sip_clues_gdf = gpd.GeoDataFrame()

# CLUES (Salud)
aup.log(f"--- Downloading CLUES pois for {city}.")
# Download
clues_gdf = aup.gdf_from_polygon(aoi, clues_schema, clues_table, geom_col="geometry")
# Filter
clues_pois = clues_gdf.loc[clues_gdf['nivel_atencion'] == 'PRIMER NIVEL']
del clues_gdf
# Format
clues_pois.loc[:,'code'] = 8610
clues_pois = clues_pois[['code','geometry']]
# Save to pois_tmp
sip_clues_gdf = pd.concat([sip_clues_gdf,clues_pois])
del clues_pois

# SIP (Marco geoestadistico)
aup.log(f"--- Downloading SIP pois for {city}.")
# Download
sip_gdf = aup.gdf_from_polygon(aoi, sip_schema, sip_table, geom_col="geometry")
sip_amenities = {'GEOGRAFICO':['Mercado','Plaza'], 
                 'TIPO':['Cancha','Unidad Deportiva','Áreas Verdes','Jardín','Parque']}
# Filter - SIP pois of interest
sip_amenities_codes = {'Mercado':4721, #sip_mercado
                       'Cancha':93110, #sip_cancha
                       'Unidad Deportiva':93111, #sip_unidad_deportiva 
                       'Áreas Verdes':9321, #sip_espacio_publico 
                       'Jardín':9321, #sip_espacio_publico
                       'Parque':9321, #sip_espacio_publico
                       'Plaza':9321 #sip_espacio_publico
                        }
# Filter - Iterate over sip_amenities and filter sip gdf
sip_pois = gpd.GeoDataFrame()
for col in sip_amenities:
    for amenity in sip_amenities[col]:
        sip_tmp = sip_gdf.loc[sip_gdf[col] == amenity]
        sip_tmp.loc[:,'code'] = sip_amenities_codes[amenity]
        sip_pois = pd.concat([sip_pois,sip_tmp])
del sip_gdf
# Format
sip_pois = sip_pois[['code','geometry']]
# Save to pois_tmp
sip_clues_gdf = pd.concat([sip_clues_gdf,sip_pois])
del sip_pois

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

# STARTS FUNCTIONS REDEVELOPMENT

In [8]:
if test:
    poly_wkt = aoi.dissolve().geometry.to_wkt()[0]

    i = 0
    source_list = []
    
    for eje in parameters.keys():
        for amenity in parameters[eje]:
            for source in parameters[eje][amenity]:
    
                source_list.append(source)
                # ANALYSIS - Select source points of interest
                source_pois = gpd.GeoDataFrame()
                for code in parameters[eje][amenity][source]:
                    #If source is denue:
                    if source[0] == 'd':
                        print(f'--- Downloading denue source pois code {code} from db.')
                        code_pois = get_denue_pois(denue_schema,denue_table,poly_wkt,code,version)
                    #If source is clues or sip:
                    elif source[0] == 'c' or source[0] == 's':
                        print(f'--- Getting clues/sip source pois code {code} from previously downloaded.')
                        code_pois = sip_clues_gdf.loc[sip_clues_gdf['code'] == code]
                    else:
                        print(f'--- Error, check parameters dicctionary.')
                        print(f'--- Sources must start with denue_, clues_ or sip_.')
                        intended_crash
                        
                    source_pois = pd.concat([source_pois,code_pois])
    
                # ANALYSIS - Calculate times from nodes to source
                #source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure)
    
    # Show
    print(source_pois.shape)
    source_pois.head(1)

## DESGLOSE DE LA FUNCIÓN aup.pois_time

In [9]:
### DESGLOSE DE LA FUNCIÓN aup.pois_time

if test:
    # Objetivo:
    #source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure)
    
    # Función usada:
    #def pois_time(G, nodes, edges, pois, poi_name, prox_measure):
    
    # Base data needed:
    pois = source_pois.copy()
    poi_name = source
    count_pois = (True,15)

## DESGLOSE DE LAS FUNCIONES aup.calculate_distance_nearest_poi & aup.get_distances

### 00 - Llegar a la función de interés

In [10]:
### DESGLOSE DE LA FUNCIÓN aup.pois_time hasta llegar a calculate_distance_nearest_poi

if test:
    ##########################################################################################
    # Step 1: NEAREST. Finds and assigns nearest node OSMID to each point of interest.
       
    # Defines projection for downloaded data
    pois = pois.set_crs("EPSG:4326")
    nodes = nodes.set_crs("EPSG:4326")
    edges = edges.set_crs("EPSG:4326")
    
    # In case there are no amenities of the type in the city, prevents it from crashing if len = 0
    if len(pois) == 0:
        nodes_time = nodes.copy()
        # Time is set to np.nan
        nodes_time['time'] = np.nan
        print(f"0 {poi_name} found. Time set to 0.")
        # Format
        nodes_time['source'] = poi_name
        nodes_time.reset_index(inplace=True)
        nodes_time = nodes_time.set_crs("EPSG:4326")
        nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]
        #return nodes_time
    
    else:
        ### Calculate nearest node for each DENUE point
        nearest = aup.find_nearest(G, nodes, pois, return_distance= True)
        nearest = nearest.set_crs("EPSG:4326")
        print(f"Found and assigned nearest node osmid to each {poi_name}.")
    
        ##########################################################################################
        # Step 2: DISTANCE NEAREST POI. Calculates distance from each node to its nearest point of interest.
            
        # --------------- 2.1 FORMAT NETWORK
        # Fill NANs with mean times
        edges[prox_measure].fillna(edges[prox_measure].mean(),inplace=True)
        
        # --------------- 2.2 ELEMENTS NEEDED OUTSIDE THE LOOP
        # The pois are divided by batches of 200 or 250 pois and analysed using the function calculate_distance_nearest_poi
        # nodes_analysis is a nodes (index reseted) used in the function.
        nodes_analysis = nodes.reset_index().copy()
        # df_temp: Each column will store a batch of procesed nodes.
        df_temp = nodes.copy()
        #nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) 
        #				 to nodes_distance (output) keeping x,y and geometry data.
        nodes_distance = nodes.copy()
        
        # --------------- 2.3 PROCESSING DISTANCE
        print (f"Starting time analysis for {poi_name}.")
        
        # If possible, analyses by batches of 200 pois.
        if len(nearest) % 250:
            batch_size = len(nearest)/200
            for k in range(int(batch_size)+1):
                print(f"Starting range k = {k+1} of {int(batch_size)+1} for {poi_name}.")
                source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
                #nodes_distance_prep = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure)
        print(len(source_process))

### 01 - Exploración del código

In [11]:
# DESGLOSE DE LA FUNCIÓN aup.calculate_distance_nearest_poi

if test:
    # Objetivo:
    #nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure, get_nearest_poi=(False, 'osmid'))
    
    # Función usada:
    # def calculate_distance_nearest_poi(gdf_f, nodes, edges, amenity_name, column_name, wght='length', get_nearest_poi=(False, 'poi_id_column') max_distance=(0,'distance_node'))
    
    # Base data needed:
    gdf_f = source_process.copy()
    nodes = nodes_analysis.copy()
    amenity_name = poi_name
    column_name = 'osmid'
    wght = prox_measure
    get_nearest_poi=(True, 'osmid')
    max_distance=(0,'distance_node')

In [12]:
# DESGLOSE DE LA FUNCIÓN aup.calculate_distance_nearest_poi

if test:
    # --- Required processing
	nodes = nodes.copy()
	edges = edges.copy()
	if max_distance[0] > 0:
		gdf_f = gdf_f.loc[gdf_f[max_distance[1]]<=max_distance[0]]
	g, weights, node_mapping = aup.to_igraph(nodes,edges,wght=wght) #convert to igraph to run the calculations
	seeds = aup.get_seeds(gdf_f, node_mapping, column_name)
	voronoi_assignment = aup.voronoi_cpu(g, weights, seeds)

    #if get_nearest_poi[0]: # Return distances and nearest poi idx
    	#distances, nearest_poi_idx = get_distances(g,seeds,weights,voronoi_assignment,get_nearest_poi=True)

In [13]:
# DESGLOSE DE LA FUNCIÓN aup.get_distances

if test:
    # Objetivo:
    # distances, nearest_poi_idx = get_distances(g,seeds,weights,voronoi_assignment,get_nearest_poi=True)
    
    # Función usada:
    # def get_distances(g, seeds, weights, voronoi_assignment, get_nearest_poi=False):
    
    # Base data needed:
    get_nearest_poi=True
    shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))
    distances = [np.min(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]

In [14]:
if test:
    # Matriz de interés. Contiene los tiempos.
    # 51434 elementos en la fila 1
    # 95 elementos en la columna 1

    # Show
    print(shortest_paths)

In [15]:
if test:
    # Ejemplo de función que se utiliza para conocer la nearest amenity
    # nearest_poi_idx = [np.argmin(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]
    
    # Show
    print(len(shortest_paths[:,0]))

In [16]:
if test:
    print(len(voronoi_assignment))

### 02 - Desarrollo del cambio a realizar para que cuente las amenidades a x tiempo (e.g. 15 mins)

In [17]:
### PRUEBA DEL MÉTODO PARA LA FUNCIÓN get_distances

if test:
    # Base data
    data = np.array([[10, 20],
                     [20, 83],
                     [1, 15]])
    
    # Test 1 (failed)
    #prueba = np.where( data <= 15 )
    # Test 2 (Success)
    prueba = [len(np.where(data[:,i] <= 15)[0]) for i in range(2)] #in range of number of columns (in function, nodes available (voronoi_assignment))
    # Show
    print(prueba)

### 03 - Prueba de redefinición de las funciones

In [18]:
if test:
    def get_distances(g, seeds, weights, voronoi_assignment, get_nearest_poi=False, count_pois = (False,0)):
    
        shortest_paths = np.array(g.shortest_paths_dijkstra(seeds,weights=weights))
        distances = [np.min(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]
        
        if get_nearest_poi:
            nearest_poi_idx = [np.argmin(shortest_paths[:,i]) for i in range(len(voronoi_assignment))]
        
        if count_pois[0]:
            near_count = [len(np.where(shortest_paths[:,i] <= count_pois[1])[0]) for i in range(len(voronoi_assignment))]
    
        # Return options
        if get_nearest_poi and count_pois[0]:
            return distances, nearest_poi_idx, near_count
        elif get_nearest_poi:
            return distances, nearest_poi_idx
        elif count_pois[0]:
            return distances, near_count
        else:
            return distances

In [19]:
if test:
    def calculate_distance_nearest_poi(gdf_f, nodes, edges, amenity_name, column_name, 
    wght='length', get_nearest_poi=(False, 'poi_id_column'),count_pois=(False,0), max_distance=(0,'distance_node')):
                                       
        nodes = nodes.copy()
        edges = edges.copy()
        if max_distance[0] > 0:
            gdf_f = gdf_f.loc[gdf_f[max_distance[1]]<=max_distance[0]]
        g, weights, node_mapping = aup.to_igraph(nodes,edges,wght=wght) #convert to igraph to run the calculations
        col_weight = f'dist_{amenity_name}'
        seeds = aup.get_seeds(gdf_f, node_mapping, column_name)
        voronoi_assignment = aup.voronoi_cpu(g, weights, seeds)
    
        # Return distances, nearest poi and near count
        if get_nearest_poi[0] and (count_pois[0]):
            distances, nearest_poi_idx, near_count = get_distances(g,seeds,weights,voronoi_assignment,
                                                                   get_nearest_poi=True, 
                                                                   count_pois=count_pois)
            nearest_poi = [gdf_f.iloc[i][get_nearest_poi[1]] for i in nearest_poi_idx]
            nodes[f'{count_pois[1]}min_{amenity_name}'] = near_count
            nodes[f'nearest_{amenity_name}'] = nearest_poi
            
        # Return distances and nearest poi
        elif get_nearest_poi[0]:
            distances, nearest_poi_idx = get_distances(g,seeds,weights,voronoi_assignment,
                                                       get_nearest_poi=True)
            nearest_poi = [gdf_f.iloc[i][get_nearest_poi[1]] for i in nearest_poi_idx]
            nodes[f'nearest_{amenity_name}'] = nearest_poi
    
        # Return distances and near count
        elif (count_pois[0]):
            distances, near_count = get_distances(g,seeds,weights,voronoi_assignment,
                                                  count_pois=count_pois)
            nodes[f'{count_pois[1]}min_{amenity_name}'] = near_count
    
        # Return distances only
        else:
            distances = get_distances(g,seeds,weights,voronoi_assignment)
    
        nodes[col_weight] = distances
    
        nodes.replace([np.inf, -np.inf], np.nan, inplace=True)
        idx = pd.notnull(nodes[col_weight])
        nodes = nodes[idx].copy()
    
        return nodes

### 04 - Resultado de las funciones redefinidas

In [20]:
if test:
    # Prueba 1: return distance
    nodes_distance_prep_01 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure)
    
    # Show
    print(nodes_distance_prep_01.shape)
    print(nodes_distance_prep_01.head(1))

In [21]:
if test:
    # Prueba 2: return distance and nearest_poi
    nodes_distance_prep_02 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                         get_nearest_poi=(True, 'osmid'))
    
    # Show
    print(nodes_distance_prep_02.shape)
    print(nodes_distance_prep_02.head(1))

In [22]:
if test:
    # Prueba 3: return distance and count amenities at 15 mins
    nodes_distance_prep_02 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                         count_pois = (True,15))
    
    # Show
    print(nodes_distance_prep_02.shape)
    print(nodes_distance_prep_02.head(1))

In [23]:
if test:
    # Prueba 4: return distance and count amenities at 30 mins
    nodes_distance_prep_02 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                         count_pois = (True,30))
    
    # Show
    print(nodes_distance_prep_02.shape)
    print(nodes_distance_prep_02.head(1))

In [24]:
if test:
    # Prueba 5: return distance, nearest poi and count amenities at 45 mins
    nodes_distance_prep_03 = calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                         get_nearest_poi=(True, 'osmid'),
                                                         count_pois = (True,45),
                                                         max_distance=(0,'distance_node'))
    
    # Show
    print(nodes_distance_prep_03.shape)
    print(nodes_distance_prep_03.head(1))

## CONTINUA DESGLOSE DE LA FUNCIÓN aup.pois_time

### 01 - Iteraciones

In [25]:
### DESGLOSE DE LA FUNCIÓN aup.pois_time (original modificada, version 1)

if test:
    ##########################################################################################
    # Step 1: NEAREST. Finds and assigns nearest node OSMID to each point of interest.
       
    # Defines projection for downloaded data
    pois = pois.set_crs("EPSG:4326")
    nodes = nodes.set_crs("EPSG:4326")
    edges = edges.set_crs("EPSG:4326")
    
    # In case there are no amenities of the type in the city, prevents it from crashing if len = 0
    if len(pois) == 0:
        nodes_time = nodes.copy()
        # Time is set to np.nan
        nodes_time['time'] = np.nan
        print(f"0 {poi_name} found. Time set to nan.")
        # If requested, pois_count is set to nan.
        if count_pois[0]:
            nodes_time['pois_count'] = np.nan
            print(f"0 {poi_name} found. Pois count set to nan.")
        
        # Format
        nodes_time['source'] = poi_name
        nodes_time.reset_index(inplace=True)
        nodes_time = nodes_time.set_crs("EPSG:4326")
        nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]
        #return nodes_time
    
    else:
        ### Calculate nearest node for each DENUE point
        nearest = aup.find_nearest(G, nodes, pois, return_distance= True)
        nearest = nearest.set_crs("EPSG:4326")
        print(f"Found and assigned nearest node osmid to each {poi_name}.")
    
        ##########################################################################################
        # Step 2: DISTANCE NEAREST POI. Calculates distance from each node to its nearest point of interest.
            
        # --------------- 2.1 FORMAT NETWORK
        # Fill NANs with mean times
        edges[prox_measure].fillna(edges[prox_measure].mean(),inplace=True)
        
        # --------------- 2.2 ELEMENTS NEEDED OUTSIDE THE LOOP
        # The pois are divided by batches of 200 or 250 pois and analysed using the function calculate_distance_nearest_poi
        # nodes_analysis is a nodes (index reseted) used in the function.
        nodes_analysis = nodes.reset_index().copy()
        # df_temp: Each column will store a batch of procesed nodes.
        df_temp = nodes.copy()
        if count_pois[0]:
            df_temp_2 = nodes.copy()
        #nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) 
        #				 to nodes_distance (output) keeping x,y and geometry data.
        nodes_distance = nodes.copy()
        
        # --------------- 2.3 PROCESSING DISTANCE
        print (f"Starting time analysis for {poi_name}.")
        
        # If possible, analyses by batches of 200 pois.
        if len(nearest) % 250:
            batch_size = len(nearest)/200
            for k in range(int(batch_size)+1):
                print(f"Starting range k = {k+1} of {int(batch_size)+1} for {poi_name}.")
                source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
                nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                                         count_pois=count_pois)
                #A middle gdf is created whose columns will be the name of the poi and the batch number it belongs to
                df_int = pd.DataFrame()
                df_int['dist_'+str(k)+poi_name] = nodes_distance_prep['dist_'+poi_name]
                if count_pois[0]:
                    df_int_2 = pd.DataFrame()
                    df_int_2[f'{count_pois[1]}min_'+str(k)+poi_name] = nodes_distance_prep[f'{count_pois[1]}min_'+poi_name]
                    
                #The middle gdf is merged into the previously created temporary gdf to store the data by node, each batch in a column.
                df_temp = df_temp.merge(df_int, left_index=True, right_index=True)
                if count_pois[0]:
                    df_temp_2 = df_temp_2.merge(df_int_2, left_index=True, right_index=True)
                    
            # Once finished, drop the non-distance values from the temporary gdf
            df_temp.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)
            if count_pois[0]:
                df_temp_2.drop(['x', 'y', 'street_count','geometry'], inplace = True, axis=1)
            
            #We apply the min function to find the minimum value. This value is sent to a new df_min
            df_min = pd.DataFrame()
            df_min['dist_'+poi_name] = df_temp.min(axis=1)
            # For pois count, we apply the sum function to find the total value. This value is sent to a new df_sum
            if count_pois[0]:
                df_sum = pd.DataFrame()
                df_sum[f'{count_pois[1]}min_'+poi_name] = df_temp_2.sum(axis=1)
            
            #We merge df_min which contains the shortest distance to the POI with nodes_distance which will store all final data
            nodes_distance = nodes_distance.merge(df_min, left_index=True, right_index=True)
            # For pois count, we merge df_sum which contains the total pois found ad given a given time to the POI with nodes_distance which will store all final data
            if count_pois[0]:
                nodes_distance = nodes_distance.merge(df_sum, left_index=True, right_index=True)

    # Show
    print(nodes_distance.shape)
    print(nodes_distance.head(1))

In [26]:
### DESGLOSE DE LA FUNCIÓN aup.pois_time (simplificada, version 2)

if test:
    ##########################################################################################
    # Step 1: NEAREST. Finds and assigns nearest node OSMID to each point of interest.
       
    # Defines projection for downloaded data
    pois = pois.set_crs("EPSG:4326")
    nodes = nodes.set_crs("EPSG:4326")
    edges = edges.set_crs("EPSG:4326")
    
    # In case there are no amenities of the type in the city, prevents it from crashing if len = 0
    if len(pois) == 0:
        nodes_time = nodes.copy()
        # Time is set to np.nan
        nodes_time['time'] = np.nan
        print(f"0 {poi_name} found. Time set to nan.")        
        # Format
        nodes_time['source'] = poi_name
        nodes_time.reset_index(inplace=True)
        nodes_time = nodes_time.set_crs("EPSG:4326")
        # If requested, pois_count is set to nan.
        if count_pois[0]:
            nodes_time['pois_count'] = np.nan
            print(f"0 {poi_name} found. Pois count set to nan.")
            nodes_time = nodes_time[['osmid','time','pois_count','source','x','y','geometry']]
            #return nodes_time
        else:
            nodes_time = nodes_time[['osmid','time','source','x','y','geometry']]
            #return nodes_time
    
    else:
        ### Calculate nearest node for each DENUE point
        nearest = aup.find_nearest(G, nodes, pois, return_distance= True)
        nearest = nearest.set_crs("EPSG:4326")
        print(f"Found and assigned nearest node osmid to each {poi_name}.")
    
        ##########################################################################################
        # Step 2: DISTANCE NEAREST POI. Calculates distance from each node to its nearest point of interest.
            
        # --------------- 2.1 FORMAT NETWORK
        # Fill NANs with mean times
        edges[prox_measure].fillna(edges[prox_measure].mean(),inplace=True)
        
        # --------------- 2.2 ELEMENTS NEEDED OUTSIDE THE LOOP
        # The pois are divided by batches of 200 or 250 pois and analysed using the function calculate_distance_nearest_poi
        # nodes_analysis is a nodes (index reseted) used in the function.
        nodes_analysis = nodes.reset_index().copy()
        # df_batch: Each column will store a batch of procesed nodes. 
        #           After all nodes are processed, min (time) is calculated.
        nodes_time = nodes.copy()
        #nodes_distance: Minimum time/distance found in all batches will be added from df_min (within if/elif/else) 
        #				 to nodes_distance (output) keeping x,y and geometry data.
        nodes_distance = nodes.copy()
        
        # --------------- 2.3 PROCESSING DISTANCE
        print (f"Starting time analysis for {poi_name}.")
        
        # If possible, analyses by batches of 200 pois.
        if len(nearest) % 250:
            batch_size = len(nearest)/200

            time_cols = []
            poiscount_cols = []
            
            for k in range(int(batch_size)+1):
                print(f"Starting range k = {k+1} of {int(batch_size)+1} for {poi_name}.")
                source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
                nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght=prox_measure,
                                                                         count_pois=count_pois)
                #A middle gdf is created whose columns will be the name of the poi and the batch number it belongs to
                batch_time_col = 'dist_'+str(k)+poi_name
                time_cols.append(batch_time_col)
                nodes_time[batch_time_col] = nodes_distance_prep['dist_'+poi_name]
                
                if count_pois[0]:
                    batch_poiscount_col = f'{count_pois[1]}min_'+str(k)+poi_name
                    poiscount_cols.append(batch_poiscount_col)
                    nodes_time[batch_poiscount_col] = nodes_distance_prep[f'{count_pois[1]}min_'+poi_name]

            # Apply the min function to find the minimum time (And the sum function to find the sum of pois at given time if requested)
            nodes_time['dist_'+poi_name] = nodes_time[time_cols].min(axis=1)
            if count_pois[0]:
                nodes_time[f'{count_pois[1]}min_'+poi_name] = nodes_time[poiscount_cols].sum(axis=1)
                nodes_time = nodes_time[['x','y','street_count','dist_'+poi_name,f'{count_pois[1]}min_'+poi_name,'geometry']]
            else:
                nodes_time = nodes_time[['x','y','street_count','dist_'+poi_name,f'{count_pois[1]}min_'+poi_name,'geometry']]

    # Show
    print(nodes_time.shape)
    print(nodes_time.head(1))

###  02 - Prueba de la función definida

In [27]:
if test:
    # Show
    print(source_pois.shape)
    print(source_pois.head(1))

In [28]:
if test:
    prueba = source_pois[source_pois.code == 2233]
    
    source_nodes_time = aup.pois_time(G, nodes, edges, prueba, source, prox_measure,count_pois=(True,15))
    
    # Show
    print(source_nodes_time.shape)
    print(source_nodes_time.head(1))

# FINISHES FUNCTIONS REDEVELOPMENT. CONTINUES SCRIPT.

In [29]:
# 1.3 --------------- ANALYSE POINTS OF INTEREST (If denue, downloads)
# ------------------- This step analysis times (and count of pois at given time proximity if requested) using function aup.pois_time.

poly_wkt = aoi.dissolve().geometry.to_wkt()[0]

i = 0
analysis_cols = []

for eje in parameters.keys():
    for amenity in parameters[eje]:
        for source in parameters[eje][amenity]:

            print(f"""Analysing source {source}.""")
            
            analysis_cols.append(source)
            if count_pois[0]:
                count_col = f'{source}_{count_pois[1]}min'
                analysis_cols.append(count_col)
            
            # ANALYSIS - Select source points of interest
            source_pois = gpd.GeoDataFrame()
            for code in parameters[eje][amenity][source]:
                #If source is denue:
                if source[0] == 'd':
                    print(f'--- Downloading denue source pois code {code} from db.')
                    code_pois = get_denue_pois(denue_schema,denue_table,poly_wkt,code,version)
                #If source is clues or sip:
                elif source[0] == 'c' or source[0] == 's':
                    print(f'--- Getting clues/sip source pois code {code} from previously downloaded.')
                    code_pois = sip_clues_gdf.loc[sip_clues_gdf['code'] == code]
                else:
                    print(f'--- Error, check parameters dicctionary.')
                    print(f'--- Sources must start with denue_, clues_ or sip_.')
                    intended_crash
                    
                source_pois = pd.concat([source_pois,code_pois])

            print(f"--- {source_pois.shape[0]} {source} pois. Analysing source pois proximity to nodes.")
            
            # ANALYSIS - Calculate times from nodes to source
            source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure,count_pois)
            source_nodes_time.rename(columns={'time_'+source:source},inplace=True)
            if count_pois[0]:
                source_nodes_time = source_nodes_time[['osmid',source,count_col,'x','y','geometry']]
            else:
                source_nodes_time = source_nodes_time[['osmid',source,'x','y','geometry']]

            # ANALYSIS - Merge all times in one df
            if i == 0: # For the first analysed source
                nodes_analysis = source_nodes_time.copy()
            else: # For the rest
                if count_pois[0]:
                    nodes_analysis = pd.merge(nodes_analysis,source_nodes_time[['osmid',source,count_col]],on='osmid')
                else:
                    nodes_analysis = pd.merge(nodes_analysis,source_nodes_time[['osmid',source]],on='osmid')

            i = i+1

            print(f"--- FINISHED source {source}. Mean city time = {nodes_analysis[source].mean()}")
        
# Final format for nodes
column_order = ['osmid'] + analysis_cols + ['x','y','geometry']
nodes_analysis = nodes_analysis[column_order]

print(f"""FINISHED source pois proximity to nodes analysis for {city}.""")

# Show
print(nodes_analysis.shape)
nodes_analysis.head(1)

Analysing source denue_restaurante_insitu.
--- Downloading denue source pois code 722511 from db.
--- Downloading denue source pois code 722512 from db.
--- Downloading denue source pois code 722513 from db.
--- Downloading denue source pois code 722514 from db.
--- Downloading denue source pois code 722519 from db.
--- 4926 denue_restaurante_insitu pois. Analysing source pois proximity to nodes.
Found and assigned nearest node osmid to each denue_restaurante_insitu.
Starting time analysis for denue_restaurante_insitu.
Starting range k = 1 of 25 for denue_restaurante_insitu.
Starting range k = 2 of 25 for denue_restaurante_insitu.
Starting range k = 3 of 25 for denue_restaurante_insitu.
Starting range k = 4 of 25 for denue_restaurante_insitu.
Starting range k = 5 of 25 for denue_restaurante_insitu.
Starting range k = 6 of 25 for denue_restaurante_insitu.
Starting range k = 7 of 25 for denue_restaurante_insitu.
Starting range k = 8 of 25 for denue_restaurante_insitu.
Starting range k = 

Unnamed: 0,osmid,denue_restaurante_insitu,denue_restaurante_insitu_15min,denue_restaurante_llevar,denue_restaurante_llevar_15min,denue_bares,denue_bares_15min,denue_cafe,denue_cafe_15min,sip_cancha,...,denue_cines_15min,denue_museos,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_centrocultural,denue_centrocultural_15min,x,y,geometry
0,272921360,0.58794,132.0,0.0,13.0,4.097719,18.0,5.25473,21.0,156.885198,...,0.0,7.589368,1.0,17.761478,0.0,15.222128,0.0,-102.295073,21.872876,POINT (-102.29507 21.87288)


In [30]:
############################################################### PART 2 ###############################################################
######################################################### AMENITIES ANALYSIS #########################################################
######################################################### (PREV. SCRIPT 15) ##########################################################

# 2.0 --------------- DEFINITIONS DICTIONARY
# ------------------- On script 15 a dictionary (idx_15_min) is used to calculate the times to amenities.
# ------------------- This step creates the definitions dicc out of the main parameters dicc.

definitions = {}
for eje in parameters.keys():
    # tmp_dicc is {amenity:[source_list]} for each eje
    tmp_dicc = {}
    for amenity in parameters[eje]:
        items_lst = []
        items = list(parameters[eje][amenity].items())
        for item in items:
            items_lst.append(item[0])
        tmp_dicc[amenity] = items_lst
    # Each eje gets assigned its own tmp_dicc
    definitions[eje] = tmp_dicc

In [31]:
# 2.1 --------------- FILL FOR MISSING AMENITIES
# ------------------- This step originates on script 15, where each cities nodes time data was loaded from db.
# ------------------- Even though its no longer needed, it remains usefull for avoiding crashes.
# ------------------- Definitions dicc (Previously, on script 15, called idx_15_min dictionary) is also used in the next steps.

all_sources = []
# Gather all possible sources
for eje in definitions.keys():
    for amenity in definitions[eje].values():
        for source in amenity:
            all_sources.append(source)

# If source not in currently analized city, fill column with np.nan
column_list = list(nodes_analysis.columns)
missing_sourceamenities = []
for s in all_sources:
        if s not in column_list:
            nodes_analysis[s] = np.nan
            print(f"--- {s} source amenity is not present in {city}.")
            missing_sourceamenities.append(s)
print(f"--- Finished missing source amenities analysis. {len(missing_sourceamenities)} not present source amenities were added as np.nan columns.")

--- Finished missing source amenities analysis. 0 not present source amenities were added as np.nan columns.


In [43]:
# 2.2a -------------- AMENITIES ANALYSIS (amenities, ejes and max_time calculation)
# ------------------- This step calculates times by amenity (preescolar/primaria/etc) using the previously created 
# ------------------- definitions dictionary (Previously, on script 15, called idx_15_min dictionary)
# ------------------- and using weights dictionary to decide which time to use (min/max/other)

print("--- Starting proximity to amenities analysis by node.")

column_max_all = [] # list with all max times column names
column_max_ejes = [] # list with ejes max times column names

#Goes through each eje in dictionary:
for e in definitions.keys():

    #Appends to lists currently examined eje
    column_max_all.append('max_'+ e.lower())
    column_max_ejes.append('max_'+ e.lower())
    column_max_amenities = [] # list with amenities in current eje

    #Goes through each amenity of current eje:
    for a in definitions[e].keys():

        #Appends to lists currently examined amenity:
        column_max_all.append('max_'+ a.lower())
        column_max_amenities.append('max_'+ a.lower())

        #Calculates time to currently examined amenity:
        #Uses source_weight dictionary to decide which time to use.
        weight = source_weight[e][a]
        if weight == 'min': # To know distance to closest source amenity.
                            # If it doesn't matter which one is closest (e.g. Alimentos).
            nodes_analysis['max_'+ a.lower()] = nodes_analysis[definitions[e][a]].min(axis=1)

        elif weight == 'max': # To know distance to farthest source amenity.
                              # If need to know proximity to all of the options (e.g. Social)
            nodes_analysis['max_'+ a.lower()] = nodes_analysis[definitions[e][a]].max(axis=1)

        elif weight == 'two-method': #'two-method' (for cultural amenity's sources).
                                     # See two_method_check function definition for explanation.
            # Check which sources are within 15 minutes
            check_lst = []
            for s in definitions[e][a]:
                nodes_analysis[s+'_check'] = nodes_analysis[s].apply(lambda x: 1 if x <= 15 else 0)
                check_lst.append(s+'_check')
            # Check how many sources are within 15 minutes
            nodes_analysis['check_count'] = nodes_analysis[check_lst].sum(axis=1)
            # Apply two method check
            nodes_analysis = nodes_analysis.apply(two_method_check,axis='columns')
            # Drop columns used for checking
            check_lst.append('check_count')
            nodes_analysis.drop(columns=check_lst,inplace=True)
        else:
            # Crash on purpose and raise error
            print("--- Error in source_weight dicc.")
            print("--- Must pass 'min', 'max' or 'two-method'")
            intended_crash

    #Calculates time to currently examined eje (max time of its amenities):
    nodes_analysis['max_'+ e.lower()] = nodes_analysis[column_max_amenities].max(axis=1) 

# Set and calculate max time
index_column = 'max_time' # column name for maximum time data
column_max_all.append(index_column) #Adds to column_max_all list the attribute 'max_time'
nodes_analysis[index_column] = nodes_analysis[column_max_ejes].max(axis=1) #Assigns "max_time" the max time for all ejes   

# Add to column_max_all list the attributes 'osmid' and 'geometry' to filter nodes_analysis.
# Looking for data of importance: columns in column_max_all list
column_max_all.append('osmid')
column_max_all.append('geometry')
nodes_timeanalysis_filter = nodes_analysis[column_max_all].copy()
    
print("--- Calculated proximity to amenities data by node.")

# Show
print(nodes_timeanalysis_filter.shape)
nodes_timeanalysis_filter.head(2)

--- Starting proximity to amenities analysis by node.
--- Calculated proximity to amenities data by node.
(51434, 7)


Unnamed: 0,max_entretenimiento,max_social,max_actividad física,max_cultural,max_time,osmid,geometry
0,15.222128,5.25473,5.692959,15.222128,15.222128,272921360,POINT (-102.29507 21.87288)
1,12.97992,6.647844,8.243084,12.97992,12.97992,272921393,POINT (-102.29510 21.87141)


In [44]:
# 2.2b -------------- AMENITIES COUNT ANALYSIS (amenities at given time count, optional)
# ------------------- Similar to previous amenities analysis, this step (optional, added later)
# ------------------- calculates how many amenities there are at a given time proximity (count_pois = (Boolean,time))

if count_pois[0]:
    column_count_all = []
    
    # Go through each eje
    for eje in definitions.keys():
        # Name of count eje
        eje_count_colname = f'{eje}_{count_pois[1]}min'.lower()
        # Append to lists
        column_count_all.append(eje_count_colname)
    
        # Go through eje's amenities
        column_count_amenities = []
        for amenity in definitions[eje]:
            # Name of count amenity
            amenity_count_colname = f'{amenity}_{count_pois[1]}min'.lower()
            # Append to lists
            column_count_all.append(amenity_count_colname)
            column_count_amenities.append(amenity_count_colname)
    
            # Gather amenities sources
            column_count_sources = [] # Just used for sum function, not added at final output
            for source in definitions[eje][amenity]:
                # Add to sources list
                source_count_colname = f'{source}_{count_pois[1]}min'
                column_count_sources.append(source_count_colname)
            # Find sum of all sources found within given time of each node (For current amenity)
            nodes_analysis[amenity_count_colname] = nodes_analysis[column_count_sources].sum(axis=1)
    
        # Find sum of all sources found within given time of each node (For current eje)
        nodes_analysis[eje_count_colname] = nodes_analysis[column_count_amenities].sum(axis=1)
    
    # Filter for columns of interest
    column_count_all.append('osmid')
    nodes_countanalysis_filter = nodes_analysis[column_count_all]
    nodes_analysis_filter = pd.merge(nodes_timeanalysis_filter,nodes_countanalysis_filter,on='osmid')

else:
    nodes_analysis_filter = nodes_timeanalysis_filter.copy()

# Show
print(nodes_analysis_filter.shape)
nodes_analysis_filter.head(2)

(51434, 11)


Unnamed: 0,max_entretenimiento,max_social,max_actividad física,max_cultural,max_time,osmid,geometry,entretenimiento_15min,social_15min,actividad física_15min,cultural_15min
0,15.222128,5.25473,5.692959,15.222128,15.222128,272921360,POINT (-102.29507 21.87288),188.0,184.0,3.0,1.0
1,12.97992,6.647844,8.243084,12.97992,12.97992,272921393,POINT (-102.29510 21.87141),209.0,204.0,3.0,2.0


In [46]:
# 2.3 --------------- POPULATION DATA
# ------------------- This step (optional) loads hexagons with population data.
######################################################################################################################################
# ------------------- This steps final code must be reviewed according to new pop data names in the db.
# ------------------- Currently, only hex_bins_pop_2020 is 8
if pop_output:
    res_list = [8]
    print(f"--- Set res_list to 8 only. pop_output currently only generates res 8 data.")
######################################################################################################################################

if pop_output:
    hex_socio_gdf = gpd.GeoDataFrame()
    # Downloads hex_socio_gdf for city area
    for res in res_list:
        # Download
        hex_pop_res = aup.gdf_from_polygon(aoi, pop_schema, pop_table, geom_col="geometry")
        hex_pop_res = hex_pop_res.set_crs("EPSG:4326")
        print(f"--- Downloaded pop gdf res {res}.")

        # Format
        hex_pop_res.rename(columns={f'hex_id_{res}':'hex_id'},inplace=True)
        hex_pop_res['res'] = res
        
        # Calculate fields of interest
        hex_pop_res_tmp = hex_pop_res.to_crs("EPSG:6372")
        hex_pop_res_tmp['dens_pob_ha'] = hex_pop_res_tmp['pobtot'] / (hex_pop_res_tmp.area / 10000)

        # Merge calculated fields to hex_pop_res gdf
        hex_pop_res_tmp = hex_pop_res_tmp[['hex_id','dens_pob_ha']]
        hex_pop_res = pd.merge(hex_pop_res,hex_pop_res_tmp,on='hex_id')

        # Save fields of interest for current res
        pop_fields = ['pobtot','dens_pob_ha']
        hex_socio_gdf = pd.concat([hex_socio_gdf,hex_pop_res[['hex_id','res']+pop_fields+['geometry']]])
        print(f"--- Saved pop gdf res {res}.")

    # Show
    print(hex_socio_gdf.shape)
    print(hex_socio_gdf.head(1))

In [47]:
# 2.4 --------------- GROUP DATA BY HEX
# ------------------- This groups nodes data by hexagon.
# ------------------- If pop output, uses previously created hexes. Else, creates hexgrid.

# Prevent crashing from trying not allowed resolutions.
checked_res_list = []
if version == 1:
    allowed_res = [8,9]
    for res in res_list:
        if res in allowed_res:
            checked_res_list.append(res)
        else:
            print(f"--- Resolution {res} removed from res_list. This res is not allowed in version {version}.")
elif version == 2:
    allowed_res = [8,9,10,11]
    for res in res_list:
        if res in allowed_res:
            checked_res_list.append(res)
        else:
            print(f"--- Resolution {res} removed from res_list. This res is not allowed in version {version}.")
res_list = checked_res_list.copy()

hex_idx = gpd.GeoDataFrame()
for res in res_list:
    # Load or create hexgrid
    # If pop_output is true, loads previously created hexgrid with pop data
    if pop_output:
        # Load hexgrid
        hex_pop = hex_socio_gdf.loc[hex_socio_gdf['res'] == res]
        # Function group_by_hex_mean requires ID to include resolution
        hex_pop.rename(columns={'hex_id':f'hex_id_{res}'},inplace=True)
        # Create hex_tmp (id and geometry)
        hex_pop = hex_pop.to_crs("EPSG:4326")
        hex_tmp = hex_pop[[f'hex_id_{res}','geometry']].copy()
        print(f"--- Loaded pop hexgrid of resolution {res}.")
        
    # If pop_output is false, creates hexgrid
    else:
        if version == 1:
            hex_table = f'hexgrid_{res}_city'
            query = f"SELECT * FROM {hex_schema}.{hex_table} WHERE \"metropolis\" LIKE \'{city}\'"
        elif version == 2:
            hex_table = f'hexgrid_{res}_city_2020'
            query = f"SELECT * FROM {hex_schema}.{hex_table} WHERE \"city\" LIKE \'{city}\'"
        else:
            print("--- Error in specified proximity analysis version.")
            print("--- Must pass integers 1 or 2.")
            intended_crash

        # Load hexgrid (which already has ID_res)
        hexgrid = aup.gdf_from_query(query, geometry_col='geometry')
        # Create hex_tmp
        hex_tmp = hexgrid.set_crs("EPSG:4326")
        hex_tmp = hex_tmp[[f'hex_id_{res}','geometry']].copy()
        print(f"--- Loaded hexgrid of resolution {res}.")
    
    # Group time data by hex
    hex_res_idx = aup.group_by_hex_mean(nodes_analysis_filter, hex_tmp, res, index_column)
    hex_res_idx = hex_res_idx.loc[hex_res_idx[index_column]>0].copy()
    print(f"--- Grouped nodes data by hexagons res {res}.")
    
    # If pop_output is true, add pop data
    if pop_output:
        pop_list = pop_fields.copy()
        pop_list.append(f'hex_id_{res}')
        hex_res_pop = pd.merge(hex_res_idx, hex_pop[pop_list], on=f'hex_id_{res}')
    else:
        hex_res_pop = hex_res_idx.copy()
    
    # After funtion group_by_hex_mean we can remove res from ID and set as a column
    hex_res_pop.rename(columns={f'hex_id_{res}':'hex_id'},inplace=True)
    hex_res_pop['res'] = res

    # Finally, add to hex_idx each resolution processing
    hex_idx = pd.concat([hex_idx,hex_res_pop])
    print(f"--- Saved grouped data by hexagons res {res}.")

# Show
print(hex_idx.shape)
hex_idx.head(1)

--- Resolution 7 removed from res_list. This res is not allowed in version 2.
--- Resolution 12 removed from res_list. This res is not allowed in version 2.
--- Loaded hexgrid of resolution 8.
--- Grouped nodes data by hexagons res 8.
--- Saved grouped data by hexagons res 8.
--- Loaded hexgrid of resolution 9.
--- Grouped nodes data by hexagons res 9.
--- Saved grouped data by hexagons res 9.
--- Loaded hexgrid of resolution 10.
--- Grouped nodes data by hexagons res 10.
--- Saved grouped data by hexagons res 10.
--- Loaded hexgrid of resolution 11.
--- Grouped nodes data by hexagons res 11.
--- Saved grouped data by hexagons res 11.
(58255, 12)


Unnamed: 0,hex_id,geometry,max_entretenimiento,max_social,max_actividad física,max_cultural,max_time,entretenimiento_15min,social_15min,actividad física_15min,cultural_15min,res
0,88498e3289fffff,"POLYGON ((-102.16756 21.82626, -102.16297 21.8...",59.692444,59.692444,37.552347,39.485914,59.692444,0.0,0.0,0.0,0.0,8


In [48]:
############################################################### PART 3 ###############################################################
#################################################### RECALCULATION AND FINAL DATA ####################################################
#################################################### (PREV. SCRIPT 15 + NEW DATA) ####################################################

# 3.1 --------------- RE-CALCULATE MAX TIMES BY HEXAGON
# ------------------- This step recalculates max time to each eje  
# ------------------- from max times to calculated amenities 

#Goes (again) through each eje in dictionary:
for e in definitions.keys():
    column_max_amenities = [] # list with amenities in current eje

    #Goes (again) through each amenity of current eje:    
    for a in definitions[e].keys():
        column_max_amenities.append('max_'+ a.lower())
    #Re-calculates time to currently examined eje (max time of its amenities):        
    hex_idx['max_'+ e.lower()] = hex_idx[column_max_amenities].max(axis=1)

print('--- Finished recalculating ejes times in hexagons.')

# Show
print(hex_idx.shape)
hex_idx.head(1)

--- Finished recalculating ejes times in hexagons.
(58255, 12)


Unnamed: 0,hex_id,geometry,max_entretenimiento,max_social,max_actividad física,max_cultural,max_time,entretenimiento_15min,social_15min,actividad física_15min,cultural_15min,res
0,88498e3289fffff,"POLYGON ((-102.16756 21.82626, -102.16297 21.8...",59.692444,59.692444,37.552347,39.485914,59.692444,0.0,0.0,0.0,0.0,8


In [49]:
# 3.2 --------------- CALCULATE AND ADD ADDITIONAL AND FINAL DATA
# ------------------- This step adds mean, median, city and idx data to each hex

#Define idx function
def apply_sigmoidal(x):
    if x == -1:
        return -1
    elif x > 1000:
        return 0
    else:
        val = aup.sigmoidal_function(0.1464814753435666, x, 30)
        return val

# Create all amenities list (previosly we had amenities list by eje) from column_max_ejes
max_amenities_cols = [i for i in column_max_all if i not in column_max_ejes]
max_amenities_cols.remove('max_time')
max_amenities_cols.remove('osmid')
max_amenities_cols.remove('geometry')
# Create list with idx column names
idx_amenities_cols = []
for ac in max_amenities_cols:
    idx_col = ac.replace('max','idx')
    hex_idx[idx_col] = hex_idx[ac].apply(apply_sigmoidal)
    idx_amenities_cols.append(idx_col)
# Add final data
hex_idx[index_column] = hex_idx[column_max_ejes].max(axis=1)
hex_idx['mean_time'] = hex_idx[max_amenities_cols].mean(axis=1)
hex_idx['median_time'] = hex_idx[max_amenities_cols].median(axis=1)
hex_idx['idx_sum'] = hex_idx[idx_amenities_cols].sum(axis=1)
hex_idx['city'] = city

print('--- Finished calculating index, mean, median and max time.')

# Show
print(hex_idx.shape)
hex_idx.head(1)

--- Finished calculating index, mean, median and max time.
(58255, 19)


Unnamed: 0,hex_id,geometry,max_entretenimiento,max_social,max_actividad física,max_cultural,max_time,entretenimiento_15min,social_15min,actividad física_15min,cultural_15min,res,idx_social,idx_actividad física,idx_cultural,mean_time,median_time,idx_sum,city
0,88498e3289fffff,"POLYGON ((-102.16756 21.82626, -102.16297 21.8...",59.692444,59.692444,37.552347,39.485914,59.692444,0.0,0.0,0.0,0.0,8,0.01275,0.248565,0.199486,45.576902,39.485914,0.460801,Aguascalientes


In [50]:
# 3.3 --------------- FINAL FORMAT
# ------------------- This step gives final format to the gdf

# First elements of ordered column list - ID and geometry
final_column_ordered_list = ['hex_id','res','geometry']

# Second elements of ordered column list - max_ejes and max_amenities 
# removing max_time, osmid and geometry.
column_max_ejes_amenities = column_max_all.copy()
column_max_ejes_amenities.remove('max_time')
column_max_ejes_amenities.remove('osmid')
column_max_ejes_amenities.remove('geometry')
final_column_ordered_list = final_column_ordered_list + column_max_ejes_amenities

# Third elements of ordered column list - count pois columns (if requested)
# removing osmid and geometry.
if count_pois[0]:
    third_elements = column_count_all.copy()
    third_elements.remove("osmid")
    final_column_ordered_list = final_column_ordered_list + third_elements

# Fourth elements of ordered list are listed in idx_amenities_cols
final_column_ordered_list = final_column_ordered_list + idx_amenities_cols

# Fifth elements of ordered list - Final mean, median, max and idx
fifth_elements = ['mean_time', 'median_time', 'max_time', 'idx_sum']
final_column_ordered_list = final_column_ordered_list + fifth_elements

# Sixth elements - If pop is calculated - Pop data
if pop_output:
    final_column_ordered_list = final_column_ordered_list + pop_fields

# Last element - City data
final_column_ordered_list.append('city')

# Filter/reorder final output    
hex_idx_city = hex_idx[final_column_ordered_list]
    
print('--- Finished final format for gdf.')

# Show
print(hex_idx_city.shape)
hex_idx_city.head(1)

--- Finished final format for gdf.
(58255, 19)


Unnamed: 0,hex_id,res,geometry,max_entretenimiento,max_social,max_actividad física,max_cultural,entretenimiento_15min,social_15min,actividad física_15min,cultural_15min,idx_social,idx_actividad física,idx_cultural,mean_time,median_time,max_time,idx_sum,city
0,88498e3289fffff,8,"POLYGON ((-102.16756 21.82626, -102.16297 21.8...",59.692444,59.692444,37.552347,39.485914,0.0,0.0,0.0,0.0,0.01275,0.248565,0.199486,45.576902,39.485914,59.692444,0.460801,Aguascalientes


In [54]:
column_max_ejes_amenities

['max_entretenimiento', 'max_social', 'max_actividad física', 'max_cultural']