# Script 21 - (Updated up to date 2024-01-30) adjusted to project: Volvo

This notebook is based on Script 21 (Most recent proximity script), created on 2024-01-30.

__The notebook differs from regular proxmity analysis in:__
* Area of Interest: Guadalajara only.
* Points of Interest (Eje-amenidad)
    * Escuelas: Preescolar, primaria, secundaria, *__guarderías__*
    * Salud: CLUES, *__farmacias__*
    * *__Parques: Parques de Guadalajara__*
    * *__Equip. Deportivos:__* Canchas, unidades deportivas
    * *__Cultural:__* Cines, museos, bibliotecas
    * *__Financieros: Bancos__*
 
* __Processing:__
    * Por amenidad:
        * Proximidad (tiempo)
        * Conteo de amenidades a 15 minutos
    * Por eje:
        * Tiempo mínimo a amenidades (al más cercano)
        * Suma del conteo de las amenidades a 15 minutos   

* Output: Generates res9
* Output name: _______________

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


## Required script 21 data

In [2]:
####################################################################################################################################
# ADAPTATION
# Version option (1 or 2) was removed because it will run bibliotecas (Version 2) but look for min time (Version 1), not two-method (Version 2).
####################################################################################################################################

city = 'Guadalajara'

# ---------------------------- BASE DATA REQUIRED ----------------------------
# Area of interest (city)
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2020' #'metro_gdf_2015' or 'metro_gdf_2020'
# Network data (nodes and edges table for distance analysis,
# also used to generate the network G with which the nearest OSMID is assigned to each poi)
network_schema = 'osmnx'
nodes_table = 'nodes' #'nodes' or 'nodes_23_point'
edges_table = 'edges_speed' ################################################################################################## PENDIENTE
# Points of interest - DENUE
denue_schema = 'denue'
denue_table = 'denue_23_point' #'denue_2020' or 'denue_23_point'
# Points of interest - CLUES
clues_schema = 'denue'
clues_table = 'clues_23_point' #'clues' or 'clues_23_point'
# Points of interest - SIP
sip_schema = 'denue'
sip_table = 'sip_23_point' #'sip_2020' or 'sip_23_point'
# Points of interest - Espacio publico (Parques) ### Exclusive project Volvo
parques_schema = 'espacios_publicos' 
parques_table = 'ep_amg'
# Hexgrid
hex_schema = 'hexgrid'
# Population data
pop_schema = 'censo'
pop_table = 'hex_censo_mza_2020_res9'

# ---------------------------- ANALYSIS AND OUTPUT OPTIONS ----------------------------
# Network distance method used in function pois_time. (If length, assumes pedestrian speed of 4km/hr.)
prox_measure = 'time_min' # Must pass 'length' or 'time_min'

# Count available amenities at given time proximity (minutes)?
count_pois = (True,15) # Must pass a tupple containing a boolean (True or False) and time proximity of interest in minutes (Boolean,time)

# If pop_output = True, loads pop data from pop_schema and pop_table.
# If pop_output = False, loads empty hexgrid.
pop_output = True ############################################################## For Volvo, we agreed pop_output=True using hex_censo_mza_2020_res9

# Hexagon resolutions of output
res_list = [9] 

# SAVING
# Save final output to db?
save = False
save_schema = 'prox_analysis'
save_table = 'proximityanalysis_24_ageb_hex'
# Local save? (Runs Aguascalientes for tests)
local_save = True
local_save_dir = '../../../data/external/temporal_fromjupyter/volvo/proxanalysis_volvo_2024_hex9.gpkg'

In [14]:
####################################################################################################################################
# ADAPTATION
# Adjusted diccs for project Volvo.
####################################################################################################################################

parameters = {'Escuelas':{'Guarderías':{'denue_guarderias':[624411, 624412]},
                          'Preescolar':{'denue_preescolar':[611111, 611112]},
                          'Primaria':{'denue_primaria':[611121, 611122]},
                          'Secundaria':{'denue_secundaria':[611131, 611132]}
                         },
              'Salud':{'Primer nivel':{'clues_primer_nivel':[8610]},
                       'Farmacias':{'denue_farmacias':[474111,464112]} ########## Volvo: 464111 Farmacias sin minisúper, 464112 Farmacias con minisúper
                       },
              'Parques':{'Area verde':{'odc_parques':[9321]} ########## Volvo: AGREGAR DE BD, 
                                                          ########## Se le asignó el code 9321, correspondiente a 'Activities of amusement parks and theme parks'
                                                          ########## Fuente del code elegido: International Standard Industrial Classification of all Economic Activities, Rev.4
                                                          ########## https://unstats.un.org/unsd/publication/seriesm/seriesm_4rev4e.pdf
                        },
              'Equipamiento deportivo':{'Canchas':{'sip_cancha':[93110]},
                                        'Unidad deportiva':{'sip_unidad_deportiva':[93111]}
                                       },
              'Cultural':{'Cines':{'denue_cines':[512130]},
                          'Museos':{'denue_museos':[712111, 712112]},
                          'Bibliotecas':{'denue_bibliotecas':[519121,519122]}
                         },
              'Financiero':{'Bancos':{'denue_bancos':[522110]} #################################################### Volvo: 522110 Banca Múltiple (Bancos y cajeros)
                           }
             }

source_weight = {'Escuelas':{'Guarderías':'min', #There is only one source, no effect.
                             'Preescolar':'min', #There is only one source, no effect.
                             'Primaria':'min', #There is only one source, no effect.
                             'Secundaria':'min' #There is only one source, no effect.
                            },
                 'Salud':{'Primer nivel':'min',#There is only one source, no effect.
                          'Farmacias':'min'#There is only one source, no effect.
                         },
                 'Parques':{'Area verde':'min'#There is only one source, no effect.
                           },
                 'Equipamiento deportivo':{'Canchas':'min',#There is only one source, no effect.
                                           'Unidad deportiva':'min'#There is only one source, no effect.
                                          },
                 'Cultural':{'Cines':'min',#There is only one source, no effect.
                             'Museos':'min',#There is only one source, no effect.
                             'Bibliotecas':'min'#There is only one source, no effect.
                            },
                 'Financiero':{'Bancos':'min'
                              }
                 }

In [4]:
####################################################################################################################################
# ADAPTATION
# Simplified version, does not filter centro cultural nor dif because it is not used in project Volvo.
####################################################################################################################################
def get_denue_pois(denue_schema,denue_table,poly_wkt,code):
    # This function downloads the codigo_act denue poi requested for the analysis.

    # Download denue pois
    query = f"SELECT * FROM {denue_schema}.{denue_table} WHERE (ST_Intersects(geometry, \'SRID=4326;{poly_wkt}\')) AND (\"codigo_act\" = \'{code}\')"
    code_pois = aup.gdf_from_query(query, geometry_col='geometry')

    # Format denue pois
    code_pois = code_pois[['codigo_act', 'geometry']]
    code_pois = code_pois.rename(columns={'codigo_act':'code'})
    code_pois['code'] = code_pois['code'].astype('int64')

    return code_pois

In [5]:
####################################################################################################################################
# NEW
# Project Volvo includes 'parques', which are in OdC's DB
####################################################################################################################################
def get_parques_pois(parques_schema,parques_table,code):
    # This function creates parques points of interest out of vertices of parques found in db (schema>table)

    # Download parques pois
    query = f"SELECT * FROM {parques_schema}.{parques_table}"
    gdf = aup.gdf_from_query(query, geometry_col='geometry')
    gdf = gdf.to_crs("EPSG:4326")

    # Get vertices coordinates
    gdf_coords = gdf.geometry.get_coordinates()

    # Merge back with gdf containing data
    gdf_coords_data = pd.merge(gdf_coords,gdf,left_index=True,right_index=True)

    # Drop poly geometry 
    df_coords_data = gdf_coords_data.drop(columns=['geometry'])
    # Set points geometry
    gdf_2 = gpd.GeoDataFrame(df_coords_data, 
                             geometry=gpd.points_from_xy(df_coords_data.x, df_coords_data.y),
                             crs='EPSG:4326')
    # Format
    gdf_2.drop(columns=['x','y'],inplace=True)
    gdf_2.reset_index(inplace=True)
    gdf_2.rename(columns={'index':'polygon_id'},inplace=True)

    # Project Volvo filter
    # Tipos
    tipos = ['Parque','Espacio verde vecinal', 'Plaza', 'Área natural', 'Área natural protegida']
    gdf_f1 = gdf_2.loc[gdf_2.Tipo.isin(tipos)]
    # Usos
    gdf_f2 = gdf_f1.loc[gdf_f1.Uso == 'Uso recreativo']

    # Proxanalysis filter
    #code_pois = gdf_f2.set_crs("EPSG:4326")
    gdf_f2['code'] = code
    code_pois = gdf_f2[['polygon_id','code','geometry']]
    code_pois['code'] = code_pois['code'].astype('int64')

    code_pois = code_pois.drop_duplicates()

    return code_pois

## Script 21

### Part 1.1 and 1.2 - AOI, G, nodes, edges, pois (clues and sip)

In [6]:
############################################################### PART 1 ###############################################################
#################################################### FIND NODES PROXIMITY TO POIS ####################################################
###################################################### (PREV. SCRIPT 01 + 02) ########################################################

# 1.1 --------------- BASE DATA FOR POIS-NODES ANALYSIS
# ------------------- This first step downloads the area of interest and network used to measure distance.

# Download area of interest
query = f"SELECT * FROM {metro_schema}.{metro_table} WHERE \"city\" LIKE \'{city}\'"
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')
mun_gdf = mun_gdf.set_crs("EPSG:4326")
aoi = mun_gdf.dissolve()

# Download Network used to calculate nearest note to each poi
G, nodes, edges = aup.graph_from_hippo(aoi, schema=network_schema, edges_folder=edges_table, nodes_folder=nodes_table)

# Show
print(nodes.shape)
print(edges.shape)

(184338, 4)
(441463, 19)


In [7]:
# 1.2 --------------- DOWNLOAD POINTS OF INTEREST (clues and sip pois, not denue)
# ------------------- This step downloads SIP and CLUES points of interest (denue pois are downloaded later.)
sip_clues_gdf = gpd.GeoDataFrame()

# CLUES (Salud)
aup.log(f"--- Downloading CLUES pois for {city}.")
# Download
clues_gdf = aup.gdf_from_polygon(aoi, clues_schema, clues_table, geom_col="geometry")
# Filter
clues_pois = clues_gdf.loc[clues_gdf['nivel_atencion'] == 'PRIMER NIVEL']
del clues_gdf
# Format
clues_pois.loc[:,'code'] = 8610
clues_pois = clues_pois[['code','geometry']]
# Save to pois_tmp
sip_clues_gdf = pd.concat([sip_clues_gdf,clues_pois])
del clues_pois

# SIP (Marco geoestadistico)
aup.log(f"--- Downloading SIP pois for {city}.")
# Download
sip_gdf = aup.gdf_from_polygon(aoi, sip_schema, sip_table, geom_col="geometry")
sip_amenities = {'GEOGRAFICO':['Mercado','Plaza'], 
                 'TIPO':['Cancha','Unidad Deportiva','Áreas Verdes','Jardín','Parque']}
# Filter - SIP pois of interest
sip_amenities_codes = {'Mercado':4721, #sip_mercado
                       'Cancha':93110, #sip_cancha
                       'Unidad Deportiva':93111, #sip_unidad_deportiva 
                       'Áreas Verdes':9321, #sip_espacio_publico 
                       'Jardín':9321, #sip_espacio_publico
                       'Parque':9321, #sip_espacio_publico
                       'Plaza':9321 #sip_espacio_publico
                        }
# Filter - Iterate over sip_amenities and filter sip gdf
sip_pois = gpd.GeoDataFrame()
for col in sip_amenities:
    for amenity in sip_amenities[col]:
        sip_tmp = sip_gdf.loc[sip_gdf[col] == amenity]
        sip_tmp.loc[:,'code'] = sip_amenities_codes[amenity]
        sip_pois = pd.concat([sip_pois,sip_tmp])
del sip_gdf
# Format
sip_pois = sip_pois[['code','geometry']]
# Save to pois_tmp
sip_clues_gdf = pd.concat([sip_clues_gdf,sip_pois])
del sip_pois

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

### Part 1.3a - nodes analysis for all amenities except green spaces

In [8]:
####################################################################################################################################
# ADAPTATION
# Added option for source being odc (In order skip it since for parks it will be slightly different).
# Removed 'version' from get_denue_pois
# Output renamed (nodes_analysis_1), will be concatenated with nodes_analysis_2 (Greenspace analysis)
####################################################################################################################################

# 1.3a --------------- ANALYSE POINTS OF INTEREST (If denue, downloads)
# ------------------- This step analysis times (and count of pois at given time proximity if requested) using function aup.pois_time.

poly_wkt = aoi.dissolve().geometry.to_wkt()[0]

i = 0
analysis_cols = []

for eje in parameters.keys():
    for amenity in parameters[eje]:
        for source in parameters[eje][amenity]:
            if source[0] == 'o':
                print(f"---SKIPPING {source}.")
                continue
            
            print(f"""---Analysing source {source}.""")
            
            analysis_cols.append(source)
            if count_pois[0]:
                count_col = f'{source}_{count_pois[1]}min'
                analysis_cols.append(count_col)
            
            # ANALYSIS - Select source points of interest
            source_pois = gpd.GeoDataFrame()
            for code in parameters[eje][amenity][source]:
                #If source is denue:
                if source[0] == 'd':
                    print(f'--- Downloading denue source pois code {code} from db.')
                    code_pois = get_denue_pois(denue_schema,denue_table,poly_wkt,code)
                #If source is clues or sip:
                elif source[0] == 'c' or source[0] == 's':
                    print(f'--- Getting clues/sip source pois code {code} from previously downloaded.')
                    code_pois = sip_clues_gdf.loc[sip_clues_gdf['code'] == code]
                else:
                    print(f'--- Error, check parameters dicctionary.')
                    print(f'--- Sources must start with denue_, clues_, odc_ or sip_.')
                    intended_crash
                    
                source_pois = pd.concat([source_pois,code_pois])

            print(f"--- {source_pois.shape[0]} {source} pois. Analysing source pois proximity to nodes.")
            
            # ANALYSIS - Calculate times from nodes to source
            source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure,count_pois)
            source_nodes_time.rename(columns={'time_'+source:source},inplace=True)
            if count_pois[0]:
                source_nodes_time = source_nodes_time[['osmid',source,count_col,'x','y','geometry']]
            else:
                source_nodes_time = source_nodes_time[['osmid',source,'x','y','geometry']]

            # ANALYSIS - Merge all times in one df
            if i == 0: # For the first analysed source
                nodes_analysis = source_nodes_time.copy()
            else: # For the rest
                if count_pois[0]:
                    nodes_analysis = pd.merge(nodes_analysis,source_nodes_time[['osmid',source,count_col]],on='osmid')
                else:
                    nodes_analysis = pd.merge(nodes_analysis,source_nodes_time[['osmid',source]],on='osmid')

            i = i+1

            print(f"--- FINISHED source {source}. Mean city time = {nodes_analysis[source].mean()}")
        
# Final format for nodes
column_order = ['osmid'] + analysis_cols + ['x','y','geometry']
nodes_analysis_1 = nodes_analysis[column_order]

print(f"""FINISHED source pois proximity to nodes analysis for {city}.""")

# Show
print(nodes_analysis_1.shape)
nodes_analysis_1.head(1)

---Analysing source denue_guarderias.
--- Downloading denue source pois code 624411 from db.
--- Downloading denue source pois code 624412 from db.
--- 427 denue_guarderias pois. Analysing source pois proximity to nodes.
Found and assigned nearest node osmid to each denue_guarderias.
Starting time analysis for denue_guarderias.
Starting range k = 1 of 3 for denue_guarderias.
Starting range k = 2 of 3 for denue_guarderias.
Starting range k = 3 of 3 for denue_guarderias.
Finished time analysis for denue_guarderias.
--- FINISHED source denue_guarderias. Mean city time = 29.733439637890502
---Analysing source denue_preescolar.
--- Downloading denue source pois code 611111 from db.
--- Downloading denue source pois code 611112 from db.
--- 1676 denue_preescolar pois. Analysing source pois proximity to nodes.
Found and assigned nearest node osmid to each denue_preescolar.
Starting time analysis for denue_preescolar.
Starting range k = 1 of 9 for denue_preescolar.
Starting range k = 2 of 9 fo

Unnamed: 0,osmid,denue_guarderias,denue_guarderias_15min,denue_preescolar,denue_preescolar_15min,denue_primaria,denue_primaria_15min,denue_secundaria,denue_secundaria_15min,clues_primer_nivel,...,denue_cines_15min,denue_museos,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_bancos,denue_bancos_15min,x,y,geometry
0,28751344,13.128475,1.0,1.80726,5.0,7.275387,8.0,3.88002,2.0,7.275387,...,0.0,84.86144,0.0,15.68526,0.0,12.983204,2.0,-103.306309,20.711533,POINT (-103.30631 20.71153)


### Part 1.3b - nodes analysis for green spaces

In [9]:
####################################################################################################################################
# 1.3b ADAPTATION
# Parks pois are vertices from polygons gotten from db.
# Since one polygon of interest will have several points of interest (vertices), any osmnx node could get assigned several pois even if they all belong to the same park.
# Therefore, we cannot use function aup.pois_time.
# This step unfolds function aup.pois_time so that if the previous case happens, the node gets assigned to the closest poi of the polygon only. (Once)
####################################################################################################################################

# 1.3 --------------- ANALYSE POINTS OF INTEREST (If denue, downloads)
# ------------------- This step analysis times (and count of pois at given time proximity if requested) using function aup.pois_time.

# PREP. FOR ANALYSIS
i = 0
# PREP. FOR ANALYSIS - List of columns used to deliver final format
analysis_cols = []

eje = 'Parques'
for amenity in parameters[eje]:
    for source in parameters[eje][amenity]:

        analysis_cols.append(source)
        # PREP. FOR ANALYSIS - If counting pois, append corresponding column (count_col formated example: 'denue_preescolar_15min')
        if count_pois[0]:
            count_col = f'{source}_{count_pois[1]}min'
            analysis_cols.append(count_col)
        
        # ANALYSIS - Select source points of interest (concats all data of current source's codes in source_pois)
        source_pois = gpd.GeoDataFrame()
        for code in parameters[eje][amenity][source]:
            if source[0] == 'o':
                print(f'--- Loading source pois code {code} from db.')
                code_pois = get_parques_pois(parques_schema,parques_table,code)
                source_pois = pd.concat([source_pois,code_pois])
                print(f'--- Loaded and filtered source pois code {code} from db.')

        print(f"--- {source_pois.shape[0]} {source} pois. Analysing source pois proximity to nodes.")

        # ANALYSIS - Calculate time data from nodes to source
        ####################################################################################################################################################################################
        # FUNCTION aup.pois_time
        # Calculates times from nodes to source (source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure,count_pois))
        # analysis function being used: def pois_time(G, nodes, edges, pois, poi_name, prox_measure,count_pois=(False,0)):
        pois = source_pois.copy()
        poi_name = source
        ##########################################################################################
        # STEP 1: NEAREST. 
        # Finds and assigns nearest node OSMID to each point of interest.
           
        # Defines projection for downloaded data
        pois = pois.set_crs("EPSG:4326")
        nodes = nodes.set_crs("EPSG:4326")
        edges = edges.set_crs("EPSG:4326")
        
        # In case there are no amenities of the type in the city, prevents it from crashing if len = 0
        if len(pois) == 0:
            nodes_time = nodes.copy()
    
            # Format
            nodes_time.reset_index(inplace=True)
            nodes_time = nodes_time.set_crs("EPSG:4326")
    
            # As no amenities were found, output columns are set to nan.
            nodes_time['time_'+poi_name] = np.nan # Time is set to np.nan.
            print(f"0 {poi_name} found. Time set to np.nan for all nodes.")
            if count_pois[0]: 
                nodes_time[f'{poi_name}_{count_pois[1]}min'] = np.nan # If requested pois_count, value is set to np.nan.
                print(f"0 {poi_name} found. Pois count set to nan for all nodes.")
                nodes_time = nodes_time[['osmid','time_'+poi_name,f'{poi_name}_{count_pois[1]}min','x','y','geometry']]
                #return nodes_time
            else:
                nodes_time = nodes_time[['osmid','time_'+poi_name,'x','y','geometry']]
                #return nodes_time
        
        else:
            ### Find nearest osmnx node for each DENUE point.
            nearest = aup.find_nearest(G, nodes, pois, return_distance= True)
            nearest = nearest.set_crs("EPSG:4326")
            print(f"Found and assigned nearest node osmid to each {poi_name}.")
                
            ##########################################################################################
            # ADDITIONAL STEP
            # Filters for the minimum distance (distance_node) from osmid to each polygon vertex
    
            # Group by node (osmid) and polygon (green space) considering only the closest vertex (min)
            groupby = nearest.groupby(['osmid','polygon_id']).agg({'distance_node':np.min})
    
            # Merges back with nodes geometry
            groupby.reset_index(inplace=True)
            geom_gdf = nodes.reset_index()[['osmid','geometry']]
            nearest = pd.merge(groupby,geom_gdf,on='osmid',how='left')
    
            # Deletes polygon_id
            nearest.drop(columns=['polygon_id'],inplace=True)
            
            ##########################################################################################
            # STEP 2: DISTANCE NEAREST POI. 
            # Calculates distance from each node to its nearest point of interest using previously assigned nearest node.
            
            # --------------- 2.1 FORMAT NETWORK DATA
            # Fill NANs with mean times (prevents crash)
            edges[prox_measure].fillna(edges[prox_measure].mean(),inplace=True)
            # If prox_measure = 'length', calculates time_min assuming walking speed = 4km/hr
            if prox_measure == 'length':
                edges['time_min'] = (edges['length']*60)/4000
            
            # --------------- 2.2 ELEMENTS NEEDED OUTSIDE THE ANALYSIS LOOP
            # The pois are divided by batches of 200 or 250 pois and analysed using the function calculate_distance_nearest_poi.
            # nodes_analysis is a nodes gdf (index reseted) used in the function aup.calculate_distance_nearest_poi.
            nodes_analysis = nodes.reset_index().copy()
            # nodes_time: int_gdf stores, processes time data within the loop and returns final gdf. (df_int, df_temp, df_min and nodes_distance in previous code versions)
            nodes_time = nodes.copy()
            
            # --------------- 2.3 PROCESSING DISTANCE
            print (f"Starting time analysis for {poi_name}.")
            # List of columns with output data by batch
            time_cols = []
            poiscount_cols = []
            
            # If possible, analyses by batches of 200 pois.
            if len(nearest) % 250:
                batch_size = len(nearest)/200
                for k in range(int(batch_size)+1):
                    print(f"Starting range k = {k+1} of {int(batch_size)+1} for {poi_name}.")
                    # Calculate
                    source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
                    nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght='time_min',count_pois=count_pois)
                    
                    # Extract from nodes_distance_prep the calculated time data
                    batch_time_col = 'time_'+str(k)+poi_name
                    time_cols.append(batch_time_col)
                    nodes_time[batch_time_col] = nodes_distance_prep['dist_'+poi_name]
                    
                    # If requested, extract from nodes_distance_prep the calculated pois count
                    if count_pois[0]:
                        batch_poiscount_col = f'{poi_name}_{str(k)}_{count_pois[1]}min'
                        poiscount_cols.append(batch_poiscount_col)
                        nodes_time[batch_poiscount_col] = nodes_distance_prep[f'{poi_name}_{count_pois[1]}min']
                
                # After batch processing is over, find final output values for all batches.
                # For time data, apply the min function to time columns.
                nodes_time['time_'+poi_name] = nodes_time[time_cols].min(axis=1)
                # If requested, apply the sum function to pois_count columns. 
                if count_pois[0]:
                    # Sum pois count
                    nodes_time[f'{poi_name}_{count_pois[1]}min'] = nodes_time[poiscount_cols].sum(axis=1)
    		
    		# Else, analyses by batches of 250 pois.
            else:
                batch_size = len(nearest)/250
                for k in range(int(batch_size)+1):
                    print(f"Starting range k = {k+1} of {int(batch_size)+1} for source {poi_name}.")
                    # Calculate
                    source_process = nearest.iloc[int(250*k):int(250*(1+k))].copy()
                    nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges, poi_name, 'osmid', wght='time_min',count_pois=count_pois)
                    
                    # Extract from nodes_distance_prep the calculated time data
                    batch_time_col = 'time_'+str(k)+poi_name
                    time_cols.append(batch_time_col)
                    nodes_time[batch_time_col] = nodes_distance_prep['dist_'+poi_name]
                    
                    # If requested, extract from nodes_distance_prep the calculated pois count
                    if count_pois[0]:
                        batch_poiscount_col = f'{poi_name}_{str(k)}_{count_pois[1]}min'
                        poiscount_cols.append(batch_poiscount_col)
                        nodes_time[batch_poiscount_col] = nodes_distance_prep[f'{poi_name}_{count_pois[1]}min']
                        
                # After batch processing is over, find final output values for all batches.
                # For time data, apply the min function to time columns.
                nodes_time['time_'+poi_name] = nodes_time[time_cols].min(axis=1)
                # If requested, apply the sum function to pois_count columns. 
                if count_pois[0]:
                    # Sum pois count
                    nodes_time[f'{poi_name}_{count_pois[1]}min'] = nodes_time[poiscount_cols].sum(axis=1)
            
            print(f"Finished time analysis for {poi_name}.")

            ##########################################################################################
            # Step 3: FINAL FORMAT. Organices and filters output data.
            nodes_time.reset_index(inplace=True)
            nodes_time = nodes_time.set_crs("EPSG:4326")
            if count_pois[0]:
                nodes_time = nodes_time[['osmid','time_'+poi_name,f'{poi_name}_{count_pois[1]}min','x','y','geometry']]
                #return nodes_time
            else:
                nodes_time = nodes_time[['osmid','time_'+poi_name,'x','y','geometry']]		
                #return nodes_time
        
        ####################################################################################################################################################################################
        # FINISHES FUNCTION aup.pois_time
        source_nodes_time = nodes_time.copy()
        ####################################################################################################################################################################################

        source_nodes_time.rename(columns={'time_'+source:source},inplace=True)
        if count_pois[0]:
            source_nodes_time = source_nodes_time[['osmid',source,count_col,'x','y','geometry']]
        else:
            source_nodes_time = source_nodes_time[['osmid',source,'x','y','geometry']]

        # ANALYSIS - Merge all times in one df
        if i == 0: # For the first analysed source
            nodes_analysis = source_nodes_time.copy()
        else: # For the rest
            if count_pois[0]:
                nodes_analysis = pd.merge(nodes_analysis,source_nodes_time[['osmid',source,count_col]],on='osmid')
            else:
                nodes_analysis = pd.merge(nodes_analysis,source_nodes_time[['osmid',source]],on='osmid')

        i = i+1

        print(f"--- FINISHED source {source}. Mean city time = {nodes_analysis[source].mean()}")
        
# Final format for nodes
column_order = ['osmid'] + analysis_cols # Removed x,y and geometry from output to merge later with nodes_analysis_1
nodes_analysis_2 = nodes_analysis[column_order]

print(f"""FINISHED source pois proximity to nodes analysis for {city}.""")

# Show
print(nodes_analysis_2.shape)
nodes_analysis_2.head(1)

--- Loading source pois code 9321 from db.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


--- Loaded and filtered source pois code 9321 from db.
--- 57635 odc_parques pois. Analysing source pois proximity to nodes.
Found and assigned nearest node osmid to each odc_parques.
Starting time analysis for odc_parques.
Starting range k = 1 of 56 for odc_parques.
Starting range k = 2 of 56 for odc_parques.
Starting range k = 3 of 56 for odc_parques.
Starting range k = 4 of 56 for odc_parques.
Starting range k = 5 of 56 for odc_parques.
Starting range k = 6 of 56 for odc_parques.
Starting range k = 7 of 56 for odc_parques.
Starting range k = 8 of 56 for odc_parques.
Starting range k = 9 of 56 for odc_parques.
Starting range k = 10 of 56 for odc_parques.
Starting range k = 11 of 56 for odc_parques.
Starting range k = 12 of 56 for odc_parques.
Starting range k = 13 of 56 for odc_parques.
Starting range k = 14 of 56 for odc_parques.
Starting range k = 15 of 56 for odc_parques.
Starting range k = 16 of 56 for odc_parques.
Starting range k = 17 of 56 for odc_parques.
Starting range k = 1

  super().__setitem__(key, value)
  super().__setitem__(key, value)


Starting range k = 51 of 56 for odc_parques.


  super().__setitem__(key, value)
  super().__setitem__(key, value)


Starting range k = 52 of 56 for odc_parques.


  super().__setitem__(key, value)
  super().__setitem__(key, value)


Starting range k = 53 of 56 for odc_parques.


  super().__setitem__(key, value)
  super().__setitem__(key, value)


Starting range k = 54 of 56 for odc_parques.


  super().__setitem__(key, value)
  super().__setitem__(key, value)


Starting range k = 55 of 56 for odc_parques.


  super().__setitem__(key, value)
  super().__setitem__(key, value)


Starting range k = 56 of 56 for odc_parques.


  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  super().__setitem__(key, value)
  nodes_time.reset_index(inplace=True)


Finished time analysis for odc_parques.
--- FINISHED source odc_parques. Mean city time = 21.967618044636573
FINISHED source pois proximity to nodes analysis for Guadalajara.
(184338, 3)


Unnamed: 0,osmid,odc_parques,odc_parques_15min
0,28751344,5.714179,29.0


### Part 1.3c - Merge both nodes analysis

In [10]:
nodes_analysis = pd.merge(nodes_analysis_1,nodes_analysis_2,on='osmid')

# Show
print(nodes_analysis.shape)
nodes_analysis.head(2)

(184338, 30)


Unnamed: 0,osmid,denue_guarderias,denue_guarderias_15min,denue_preescolar,denue_preescolar_15min,denue_primaria,denue_primaria_15min,denue_secundaria,denue_secundaria_15min,clues_primer_nivel,...,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_bancos,denue_bancos_15min,x,y,geometry,odc_parques,odc_parques_15min
0,28751344,13.128475,1.0,1.80726,5.0,7.275387,8.0,3.88002,2.0,7.275387,...,0.0,15.68526,0.0,12.983204,2.0,-103.306309,20.711533,POINT (-103.30631 20.71153),5.714179,29.0
1,28753224,5.504485,2.0,5.731417,12.0,7.703541,10.0,16.288914,0.0,11.194589,...,0.0,12.200104,1.0,5.69019,5.0,-103.316645,20.70065,POINT (-103.31665 20.70065),9.018499,11.0


## Part 2

In [11]:
############################################################### PART 2 ###############################################################
######################################################### AMENITIES ANALYSIS #########################################################
######################################################### (PREV. SCRIPT 15) ##########################################################

# 2.0 --------------- DEFINITIONS DICTIONARY
# ------------------- On script 15 a dictionary (idx_15_min) is used to calculate the times to amenities.
# ------------------- This step creates the definitions dicc out of the main parameters dicc.

definitions = {}
for eje in parameters.keys():
    # tmp_dicc is {amenity:[source_list]} for each eje
    tmp_dicc = {}
    for amenity in parameters[eje]:
        items_lst = []
        items = list(parameters[eje][amenity].items())
        for item in items:
            items_lst.append(item[0])
        tmp_dicc[amenity] = items_lst
    # Each eje gets assigned its own tmp_dicc
    definitions[eje] = tmp_dicc

In [12]:
# 2.1 --------------- FILL FOR MISSING AMENITIES
# ------------------- This step originates on script 15, where each cities nodes time data was loaded from db.
# ------------------- Even though its no longer needed, it remains usefull for avoiding crashes.
# ------------------- Definitions dicc (Previously, on script 15, called idx_15_min dictionary) is also used in the next steps.

all_sources = []
# Gather all possible sources
for eje in definitions.keys():
    for amenity in definitions[eje].values():
        for source in amenity:
            all_sources.append(source)

# If source not in currently analized city, fill column with np.nan
column_list = list(nodes_analysis.columns)
missing_sourceamenities = []
for s in all_sources:
        if s not in column_list:
            nodes_analysis[s] = np.nan
            print(f"--- {s} source amenity is not present in {city}.")
            missing_sourceamenities.append(s)
print(f"--- Finished missing source amenities analysis. {len(missing_sourceamenities)} not present source amenities were added as np.nan columns.")

--- Finished missing source amenities analysis. 0 not present source amenities were added as np.nan columns.


In [15]:
####################################################################################################################################
# ADAPTATION
# Changed prefix 'max_' for 'time_' in amenities
# Added 'min_' to ejes
# Removed 'two-method'
####################################################################################################################################

# 2.2a -------------- AMENITIES ANALYSIS (amenities, ejes and max_time calculation)
# ------------------- This step calculates times by amenity (preescolar/primaria/etc) using the previously created 
# ------------------- definitions dictionary (Previously, on script 15, called idx_15_min dictionary)
# ------------------- and using weights dictionary to decide which time to use (min/max/other)

print("--- Starting proximity to amenities analysis by node.")

column_max_all = [] # list with all max times column names
column_max_ejes = [] # list with ejes max times column names

#Goes through each eje in dictionary:
for e in definitions.keys():

    #Appends to lists currently examined eje
    column_max_all.append('max_'+ e.lower())
    column_max_all.append('min_'+ e.lower())
    column_max_ejes.append('max_'+ e.lower())
    column_max_ejes.append('min_'+ e.lower())
    column_max_amenities = [] # list with amenities in current eje

    #Goes through each amenity of current eje:
    for a in definitions[e].keys():

        #Appends to lists currently examined amenity:
        column_max_all.append('time_'+ a.lower())
        column_max_amenities.append('time_'+ a.lower())

        #Calculates time to currently examined amenity:
        #Uses source_weight dictionary to decide which time to use.
        weight = source_weight[e][a]
        if weight == 'min': # To know distance to closest source amenity.
                            # If it doesn't matter which one is closest (e.g. Alimentos).
            nodes_analysis['time_'+ a.lower()] = nodes_analysis[definitions[e][a]].min(axis=1)
            
            print(f'For amenity {a} found minimum time between {definitions[e][a]}.')

        elif weight == 'max': # To know distance to farthest source amenity.
                              # If need to know proximity to all of the options (e.g. Social)
            nodes_analysis['time_'+ a.lower()] = nodes_analysis[definitions[e][a]].max(axis=1)
            
            print(f'For amenity {a} found maximum time between {definitions[e][a]}.')
        else:
            # Crash on purpose and raise error
            print("--- Error in source_weight dicc.")
            print("--- Must pass 'min', 'max' or 'two-method'")
            intended_crash

    #Calculates time to currently examined eje (max time of its amenities):
    nodes_analysis['max_'+ e.lower()] = nodes_analysis[column_max_amenities].max(axis=1)
    nodes_analysis['min_'+ e.lower()] = nodes_analysis[column_max_amenities].min(axis=1)

# Set and calculate max time
index_column = 'max_time' # column name for maximum time data
column_max_all.append(index_column) #Adds to column_max_all list the attribute 'max_time'
nodes_analysis[index_column] = nodes_analysis[column_max_ejes].max(axis=1) #Assigns "max_time" the max time for all ejes   

# Add to column_max_all list the attributes 'osmid' and 'geometry' to filter nodes_analysis.
# Looking for data of importance: columns in column_max_all list
column_max_all.append('osmid')
column_max_all.append('geometry')
nodes_timeanalysis_filter = nodes_analysis[column_max_all].copy()
    
print("--- Calculated proximity to amenities data by node.")

# Show
print(nodes_timeanalysis_filter.shape)
nodes_timeanalysis_filter.head(2)

--- Starting proximity to amenities analysis by node.
For amenity Guarderías found minimum time between ['denue_guarderias'].
For amenity Preescolar found minimum time between ['denue_preescolar'].
For amenity Primaria found minimum time between ['denue_primaria'].
For amenity Secundaria found minimum time between ['denue_secundaria'].
For amenity Primer nivel found minimum time between ['clues_primer_nivel'].
For amenity Farmacias found minimum time between ['denue_farmacias'].
For amenity Area verde found minimum time between ['odc_parques'].
For amenity Canchas found minimum time between ['sip_cancha'].
For amenity Unidad deportiva found minimum time between ['sip_unidad_deportiva'].
For amenity Cines found minimum time between ['denue_cines'].
For amenity Museos found minimum time between ['denue_museos'].
For amenity Bibliotecas found minimum time between ['denue_bibliotecas'].
For amenity Bancos found minimum time between ['denue_bancos'].
--- Calculated proximity to amenities da

Unnamed: 0,max_escuelas,min_escuelas,time_guarderías,time_preescolar,time_primaria,time_secundaria,max_salud,min_salud,time_primer nivel,time_farmacias,...,min_cultural,time_cines,time_museos,time_bibliotecas,max_financiero,min_financiero,time_bancos,max_time,osmid,geometry
0,13.128475,1.80726,13.128475,1.80726,7.275387,3.88002,7.275387,4.255024,7.275387,4.255024,...,15.68526,31.402184,84.86144,15.68526,12.983204,12.983204,12.983204,84.86144,28751344,POINT (-103.30631 20.71153)
1,16.288914,5.504485,5.504485,5.731417,7.703541,16.288914,11.194589,2.455262,11.194589,2.455262,...,12.200104,40.056989,60.503516,12.200104,5.69019,5.69019,5.69019,60.503516,28753224,POINT (-103.31665 20.70065)


In [16]:
# 2.2b -------------- AMENITIES COUNT ANALYSIS (amenities at given time count, optional)
# ------------------- Similar to previous amenities analysis, this step (optional, added later)
# ------------------- calculates how many amenities there are at a given time proximity (count_pois = (Boolean,time))

if count_pois[0]:
    column_count_all = []
    
    # Go through each eje
    for eje in definitions.keys():
        # Name of count eje
        eje_count_colname = f'{eje}_{count_pois[1]}min'.lower()
        # Append to lists
        column_count_all.append(eje_count_colname)
    
        # Go through eje's amenities
        column_count_amenities = []
        for amenity in definitions[eje]:
            # Name of count amenity
            amenity_count_colname = f'{amenity}_{count_pois[1]}min'.lower()
            # Append to lists
            column_count_all.append(amenity_count_colname)
            column_count_amenities.append(amenity_count_colname)
    
            # Gather amenities sources
            column_count_sources = [] # Just used for sum function, not added at final output
            for source in definitions[eje][amenity]:
                # Add to sources list
                source_count_colname = f'{source}_{count_pois[1]}min'
                column_count_sources.append(source_count_colname)
            # Find sum of all sources found within given time of each node (For current amenity)
            nodes_analysis[amenity_count_colname] = nodes_analysis[column_count_sources].sum(axis=1)
    
        # Find sum of all sources found within given time of each node (For current eje)
        nodes_analysis[eje_count_colname] = nodes_analysis[column_count_amenities].sum(axis=1)
    
    # Filter for columns of interest
    column_count_all.append('osmid')
    nodes_countanalysis_filter = nodes_analysis[column_count_all]
    nodes_analysis_filter = pd.merge(nodes_timeanalysis_filter,nodes_countanalysis_filter,on='osmid')

else:
    nodes_analysis_filter = nodes_timeanalysis_filter.copy()

# Show
print(nodes_analysis_filter.shape)
nodes_analysis_filter.head(2)

(184338, 47)


Unnamed: 0,max_escuelas,min_escuelas,time_guarderías,time_preescolar,time_primaria,time_secundaria,max_salud,min_salud,time_primer nivel,time_farmacias,...,area verde_15min,equipamiento deportivo_15min,canchas_15min,unidad deportiva_15min,cultural_15min,cines_15min,museos_15min,bibliotecas_15min,financiero_15min,bancos_15min
0,13.128475,1.80726,13.128475,1.80726,7.275387,3.88002,7.275387,4.255024,7.275387,4.255024,...,29.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
1,16.288914,5.504485,5.504485,5.731417,7.703541,16.288914,11.194589,2.455262,11.194589,2.455262,...,11.0,2.0,1.0,1.0,1.0,0.0,0.0,1.0,5.0,5.0


In [18]:
####################################################################################################################################
# ADAPTATION
# Using hex_censo_mza_res9, does not need to calculate dens_pob_ha, it's already there.
####################################################################################################################################

# 2.3 --------------- POPULATION DATA
# ------------------- This step (optional) loads hexagons with population data.

if pop_output:
    hex_socio_gdf = gpd.GeoDataFrame()
    # Downloads hex_socio_gdf for city area
    for res in res_list:
        # Download
        hex_pop_res = aup.gdf_from_polygon(aoi, pop_schema, pop_table, geom_col="geometry")
        hex_pop_res = hex_pop_res.set_crs("EPSG:4326")
        print(f"--- Downloaded pop gdf res {res}.")

        # Format
        hex_pop_res.rename(columns={f'hex_id_{res}':'hex_id'},inplace=True)
        hex_pop_res['res'] = res
        
        # Calculate fields of interest
        #hex_pop_res_tmp = hex_pop_res.to_crs("EPSG:6372")
        #hex_pop_res_tmp['dens_pob_ha'] = hex_pop_res_tmp['pobtot'] / (hex_pop_res_tmp.area / 10000)

        # Merge calculated fields to hex_pop_res gdf
        #hex_pop_res_tmp = hex_pop_res_tmp[['hex_id','dens_pob_ha']]
        #hex_pop_res = pd.merge(hex_pop_res,hex_pop_res_tmp,on='hex_id')

        # Save fields of interest for current res
        pop_fields = ['pobtot','dens_pob_ha']
        hex_socio_gdf = pd.concat([hex_socio_gdf,hex_pop_res[['hex_id','res']+pop_fields+['geometry']]])
        print(f"--- Saved pop gdf res {res}.")

    # Show
    print(hex_socio_gdf.shape)
    print(hex_socio_gdf.head(1))

--- Downloaded pop gdf res 9.
--- Saved pop gdf res 9.
(5667, 5)
            hex_id  res  pobtot  dens_pob_ha  \
0  8949aa252bbffff    9   895.0    72.377587   

                                            geometry  
0  POLYGON ((-103.40480 20.46253, -103.40358 20.4...  


In [19]:
####################################################################################################################################
# ADAPTATION
# Removed code refering to version
####################################################################################################################################

# 2.4 --------------- GROUP DATA BY HEX
# ------------------- This groups nodes data by hexagon.
# ------------------- If pop output, uses previously created hexes. Else, creates hexgrid.

hex_idx = gpd.GeoDataFrame()
for res in res_list:
    # Load or create hexgrid
    # If pop_output is true, loads previously created hexgrid with pop data
    if pop_output:
        # Load hexgrid
        hex_pop = hex_socio_gdf.loc[hex_socio_gdf['res'] == res]
        # Function group_by_hex_mean requires ID to include resolution
        hex_pop.rename(columns={'hex_id':f'hex_id_{res}'},inplace=True)
        # Create hex_tmp (id and geometry)
        hex_pop = hex_pop.to_crs("EPSG:4326")
        hex_tmp = hex_pop[[f'hex_id_{res}','geometry']].copy()
        print(f"--- Loaded pop hexgrid of resolution {res}.")
        
    # If pop_output is false, creates hexgrid
    else:
        # Load hexgrid (which already has ID_res)
        hex_table = f'hexgrid_{res}_city_2020'
        query = f"SELECT * FROM {hex_schema}.{hex_table} WHERE \"city\" LIKE \'{city}\'"
        hexgrid = aup.gdf_from_query(query, geometry_col='geometry')
        # Create hex_tmp
        hex_tmp = hexgrid.set_crs("EPSG:4326")
        hex_tmp = hex_tmp[[f'hex_id_{res}','geometry']].copy()
        print(f"--- Loaded hexgrid of resolution {res}.")
    
    # Group time data by hex
    hex_res_idx = aup.group_by_hex_mean(nodes_analysis_filter, hex_tmp, res, index_column)
    hex_res_idx = hex_res_idx.loc[hex_res_idx[index_column]>0].copy()
    print(f"--- Grouped nodes data by hexagons res {res}.")
    
    # If pop_output is true, add pop data
    if pop_output:
        pop_list = pop_fields.copy()
        pop_list.append(f'hex_id_{res}')
        hex_res_pop = pd.merge(hex_res_idx, hex_pop[pop_list], on=f'hex_id_{res}')
    else:
        hex_res_pop = hex_res_idx.copy()
    
    # After funtion group_by_hex_mean we can remove res from ID and set as a column
    hex_res_pop.rename(columns={f'hex_id_{res}':'hex_id'},inplace=True)
    hex_res_pop['res'] = res

    # Finally, add to hex_idx each resolution processing
    hex_idx = pd.concat([hex_idx,hex_res_pop])
    print(f"--- Saved grouped data by hexagons res {res}.")

# Show
print(hex_idx.shape)
hex_idx.head(1)

--- Loaded pop hexgrid of resolution 9.
--- Grouped nodes data by hexagons res 9.
--- Saved grouped data by hexagons res 9.
(5639, 50)


Unnamed: 0,hex_id,geometry,max_escuelas,min_escuelas,time_guarderías,time_preescolar,time_primaria,time_secundaria,max_salud,min_salud,...,unidad deportiva_15min,cultural_15min,cines_15min,museos_15min,bibliotecas_15min,financiero_15min,bancos_15min,pobtot,dens_pob_ha,res
0,8949aa252bbffff,"POLYGON ((-103.40480 20.46253, -103.40358 20.4...",65.700136,4.316403,65.700136,18.338011,15.153014,5.424581,65.126155,13.229981,...,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,895.0,72.377587,9


In [20]:
############################################################### PART 3 ###############################################################
#################################################### RECALCULATION AND FINAL DATA ####################################################
#################################################### (PREV. SCRIPT 15 + NEW DATA) ####################################################

####################################################################################################################################
# ADAPTATION
# Changed 'max_' prefix to 'time_' prefix for amenities
# Added min_ to ejes re-calculation
####################################################################################################################################

# 3.1 --------------- RE-CALCULATE MAX TIMES BY HEXAGON
# ------------------- This step recalculates max time to each eje  
# ------------------- from max times to calculated amenities 

#Goes (again) through each eje in dictionary:
for e in definitions.keys():
    column_max_amenities = [] # list with amenities in current eje

    #Goes (again) through each amenity of current eje:    
    for a in definitions[e].keys():
        column_max_amenities.append('time_'+ a.lower())
    #Re-calculates time to currently examined eje (min and max time of its amenities):        
    hex_idx['max_'+ e.lower()] = hex_idx[column_max_amenities].max(axis=1)
    hex_idx['min_'+ e.lower()] = hex_idx[column_max_amenities].min(axis=1)

print('--- Finished recalculating ejes times in hexagons.')

# Show
print(hex_idx.shape)
hex_idx.head(1)

--- Finished recalculating ejes times in hexagons.
(5639, 50)


Unnamed: 0,hex_id,geometry,max_escuelas,min_escuelas,time_guarderías,time_preescolar,time_primaria,time_secundaria,max_salud,min_salud,...,unidad deportiva_15min,cultural_15min,cines_15min,museos_15min,bibliotecas_15min,financiero_15min,bancos_15min,pobtot,dens_pob_ha,res
0,8949aa252bbffff,"POLYGON ((-103.40480 20.46253, -103.40358 20.4...",65.700136,5.424581,65.700136,18.338011,15.153014,5.424581,65.126155,13.229981,...,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,895.0,72.377587,9


In [21]:
####################################################################################################################################
# ADAPTATION
# Removed additional data (idx, mean_time, median_time, city)
####################################################################################################################################

# 3.2 --------------- CALCULATE AND ADD ADDITIONAL AND FINAL DATA
# ------------------- This step adds mean, median, city and idx data to each hex

#Define idx function
#def apply_sigmoidal(x):
#    if x == -1:
#        return -1
#    elif x > 1000:
#        return 0
#    else:
#        val = aup.sigmoidal_function(0.1464814753435666, x, 30)
#        return val

# Create all amenities list (previosly we had amenities list by eje) from column_max_ejes
max_amenities_cols = [i for i in column_max_all if i not in column_max_ejes]
max_amenities_cols.remove('max_time')
max_amenities_cols.remove('osmid')
max_amenities_cols.remove('geometry')
# Create list with idx column names
#idx_amenities_cols = []
#for ac in max_amenities_cols:
#    idx_col = ac.replace('max','idx')
#    hex_idx[idx_col] = hex_idx[ac].apply(apply_sigmoidal)
#    idx_amenities_cols.append(idx_col)
# Add final data
#hex_idx[index_column] = hex_idx[column_max_ejes].max(axis=1)
#hex_idx['mean_time'] = hex_idx[max_amenities_cols].mean(axis=1)
#hex_idx['median_time'] = hex_idx[max_amenities_cols].median(axis=1)
#hex_idx['idx_sum'] = hex_idx[idx_amenities_cols].sum(axis=1)
#hex_idx['city'] = city

#print('--- Finished calculating index, mean, median and max time.')

# Show
print(hex_idx.shape)
hex_idx.head(1)

(5639, 50)


Unnamed: 0,hex_id,geometry,max_escuelas,min_escuelas,time_guarderías,time_preescolar,time_primaria,time_secundaria,max_salud,min_salud,...,unidad deportiva_15min,cultural_15min,cines_15min,museos_15min,bibliotecas_15min,financiero_15min,bancos_15min,pobtot,dens_pob_ha,res
0,8949aa252bbffff,"POLYGON ((-103.40480 20.46253, -103.40358 20.4...",65.700136,5.424581,65.700136,18.338011,15.153014,5.424581,65.126155,13.229981,...,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,895.0,72.377587,9


In [22]:
####################################################################################################################################
# ADAPTATION
# Removed additional data (idx, mean_time, median_time, city)
####################################################################################################################################

# 3.3 --------------- FINAL FORMAT
# ------------------- This step gives final format to the gdf

# First elements of ordered column list - ID and geometry
final_column_ordered_list = ['hex_id','res','geometry']

# Second elements of ordered column list - max_ejes and max_amenities 
# removing max_time, osmid and geometry.
column_max_ejes_amenities = column_max_all.copy()
column_max_ejes_amenities.remove('max_time')
column_max_ejes_amenities.remove('osmid')
column_max_ejes_amenities.remove('geometry')
final_column_ordered_list = final_column_ordered_list + column_max_ejes_amenities

# Third elements of ordered column list - count pois columns (if requested)
# removing osmid and geometry.
if count_pois[0]:
    third_elements = column_count_all.copy()
    third_elements.remove("osmid")
    final_column_ordered_list = final_column_ordered_list + third_elements

# Fourth elements of ordered list are listed in idx_amenities_cols
#final_column_ordered_list = final_column_ordered_list + idx_amenities_cols

# Fifth elements of ordered list - Final mean, median, max and idx
#fifth_elements = ['mean_time', 'median_time', 'max_time', 'idx_sum']
#final_column_ordered_list = final_column_ordered_list + fifth_elements

# Sixth elements - If pop is calculated - Pop data
if pop_output:
    final_column_ordered_list = final_column_ordered_list + pop_fields

# Last element - City data
#final_column_ordered_list.append('city')

# Filter/reorder final output    
hex_idx_city = hex_idx[final_column_ordered_list]
    
print('--- Finished final format for gdf.')

# Show
print(hex_idx_city.shape)
hex_idx_city.head(1)

--- Finished final format for gdf.
(5639, 49)


Unnamed: 0,hex_id,res,geometry,max_escuelas,min_escuelas,time_guarderías,time_preescolar,time_primaria,time_secundaria,max_salud,...,canchas_15min,unidad deportiva_15min,cultural_15min,cines_15min,museos_15min,bibliotecas_15min,financiero_15min,bancos_15min,pobtot,dens_pob_ha
0,8949aa252bbffff,9,"POLYGON ((-103.40480 20.46253, -103.40358 20.4...",65.700136,5.424581,65.700136,18.338011,15.153014,5.424581,65.126155,...,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,895.0,72.377587


In [23]:
hex_idx_city.columns

Index(['hex_id', 'res', 'geometry', 'max_escuelas', 'min_escuelas',
       'time_guarderías', 'time_preescolar', 'time_primaria',
       'time_secundaria', 'max_salud', 'min_salud', 'time_primer nivel',
       'time_farmacias', 'max_parques', 'min_parques', 'time_area verde',
       'max_equipamiento deportivo', 'min_equipamiento deportivo',
       'time_canchas', 'time_unidad deportiva', 'max_cultural', 'min_cultural',
       'time_cines', 'time_museos', 'time_bibliotecas', 'max_financiero',
       'min_financiero', 'time_bancos', 'escuelas_15min', 'guarderías_15min',
       'preescolar_15min', 'primaria_15min', 'secundaria_15min', 'salud_15min',
       'primer nivel_15min', 'farmacias_15min', 'parques_15min',
       'area verde_15min', 'equipamiento deportivo_15min', 'canchas_15min',
       'unidad deportiva_15min', 'cultural_15min', 'cines_15min',
       'museos_15min', 'bibliotecas_15min', 'financiero_15min', 'bancos_15min',
       'pobtot', 'dens_pob_ha'],
      dtype='object')

In [25]:
if local_save:
    hex_idx_city.to_file(local_save_dir)