# prox c-01 Script debugs and tests

__Notebooks starting with "prox c-" integrate new processes and methodologies for calculating proximity. Based on the develop of proximity 2024 (Script 21-proximity-analysis-mexico.py)__

Updated on 2024 05 06.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

# Finding script problem [Solved, added set_index to nodes and edges function aup.create_osmnx_network] 

## Step 0: Notebook/Script config

### Config - Base data required

In [4]:
# ------------------------------ BASE DATA REQUIRED ------------------------------
# Name of area of interest (Required)
city = 'Aguascalientes'
# Shape of the area of interest (Required directory)
aoi_dir = "../../../data/external/temporal_todocker/prox_aoi/aoi_ags.gpkg"
# Points of interest (Required directory)
# pois gdf must have a col named 'code' with a unique ID for each type of point of interest.
# This code will be searched in dicc parameters to be assigned to a source-->amenity-->eje.
pois_dir = "../../../data/external/temporal_todocker/prox_aoi/pois_ags.gpkg"

### Config - Analysis and output options

In [5]:
# ---------------------------- SCRIPT CONFIGURATION - ANALYSIS AND OUTPUT OPTIONS ----------------------------
# IMPORTANT NOTE:
# Network distance method used in function pois_time will always be 'length' since
# this notebook creates its own OSMnx Network ('time_min' is the result of pre-processing)
# Therefore, this script assumes pedestrian speed of 4km/hr

# Resolutions of hexgrid output (Required)
res_list = [8,9]
# Count available amenities at given time proximity (minutes)? (Required)
count_pois = (False,15) # Must pass a tupple containing a boolean (True or False) and time proximity of interest in minutes (Boolean,time)
# Save disk space by deleting used data that will not be used after? (Required)
save_space = False

# OPTIONAL 
pop_output = True
# Pop data by block file directory (Required if pop_output = True)
# Pop data is converted to hex data by using centroids.
pop_dir = "../../../data/external/temporal_todocker/prox_aoi/pop_gdf_ags.gpkg"
# List of columns with pop data. with total pop data (Required if pop_output = True)
# First item of list must be name of total population column in order to calculate density.
pop_columns = ['pobtot','pobfem','pobmas']
# Pop gdf index column (Required if pop_output = True)
pop_index_column = 'cvegeo'

### Config - Saving

In [6]:
# ---------------------------- SCRIPT CONFIGURATION - SAVING ----------------------------
# Save final output to database?
db_save = True
save_schema = 'prox_analysis'
nodes_save_table = 'nodesproximity_aoi'
hex_save_table = 'proximityanalysis_aoi'
# Test - (If testing, Script saves it ONLY locally. (Make sure directory exists)
test = True
nodes_local_save_dir = f"../../../data/processed/prox_aoi/test_{city}_script18_nodes.gpkg"
final_local_save_dir = f"../../../data/processed/prox_aoi/test_{city}_script18_hex.gpkg"

### Config - Pois structure

In [7]:
# ---------------------------- SCRIPT CONFIGURATION - POIS STRUCTURE ----------------------------
# PARAMETERS DICTIONARY (Required)
# Set the ejes, amenidades, sources and codes for analysis
        #{Eje (e):
        #            {Amenity (a):
        #                          {Sources (s):
        #                                           [Codes (c)]
        #                           }
        #             }
        #}
parameters = {'Escuelas':{'Preescolar':{'denue_preescolar':[611111, 611112]},
                        'Primaria':{'denue_primaria':[611121, 611122]},
                        'Secundaria':{'denue_secundaria':[611131, 611132]}
                        },
            'Servicios comunitarios':{'Salud':{'clues_primer_nivel':[8610]},
                                    'Guarderías':{'denue_guarderias':[624411, 624412]},
                                    'Asistencia social':{'denue_dif':[931610]}
                                    },
            'Comercio':{'Alimentos':{'denue_supermercado':[462111],
                                    'denue_abarrotes':[461110], 
                                    'denue_carnicerias': [461121, 461122, 461123],
                                    'sip_mercado':[4721]},
                        'Personal':{'denue_peluqueria':[812110]},
                        'Farmacias':{'denue_farmacias':[464111, 464112]},
                        'Hogar':{'denue_ferreteria_tlapaleria':[467111],
                                'denue_art_limpieza':[467115]},
                        'Complementarios':{'denue_ropa':[463211, 463212, 463213, 463215, 463216, 463218],
                                            'denue_calzado':[463310], 
                                            'denue_muebles':[466111, 466112, 466113, 466114],
                                            'denue_lavanderia':[812210],
                                            'denue_revistas_periodicos':[465313],
                                            'denue_pintura':[467113]}
                        },
            'Entretenimiento':{'Social':{'denue_restaurante_insitu':[722511, 722512, 722513, 722514, 722519],
                                        'denue_restaurante_llevar':[722516, 722518, 722517],
                                        'denue_bares':[722412],
                                        'denue_cafe':[722515]},
                                'Actividad física':{'sip_cancha':[93110],
                                                    'sip_unidad_deportiva':[93111],
                                                    'sip_espacio_publico':[9321],
                                                    'denue_parque_natural':[712190]},
                                'Cultural':{'denue_cines':[512130],
                                            'denue_museos':[712111, 712112]}
                                } 
            }

# WEIGHT DICTIONARY (Required)
# If need to measure nearest source for amenity, doesn't matter which, choose 'min'
# If need to measure access to all of the different sources in an amenity, choose 'max'
source_weight = {'Escuelas':{'Preescolar':'max', #There is only one source, no effect.
                            'Primaria':'max',  #There is only one source, no effect.
                            'Secundaria':'max'},  #There is only one source, no effect.
                'Servicios comunitarios':{'Salud':'max',  #There is only one source, no effect.
                                        'Guarderías':'max', #There is only one source, no effect.
                                        'Asistencia social':'max'},  #There is only one source, no effect.
                'Comercio':{'Alimentos':'min', # /////////////////////////////////////////////////////// Will choose min time to source because measuring access to nearest food source, doesn't matter which.
                            'Personal':'max', #There is only one source, no effect.
                            'Farmacias':'max', #There is only one source, no effect.
                            'Hogar':'min', # ////////////////////////////////////////////////////////// Will choose min time to source because measuring access to nearest source, doesn't matter which.
                            'Complementarios':'min'}, # /////////////////////////////////////////////// Will choose min time to source because measuring access to nearest source, doesn't matter which.
                'Entretenimiento':{'Social':'max', # ////////////////////////////////////////////////// Will choose max time to source because measuring access to all of them.
                                    'Actividad física':'min', # //////////////////////////////////////// Will choose min time to source because measuring access to nearest source, doesn't matter which.
                                    'Cultural':'min'} # //////////////////////////////////////////////// Will choose min time to source because measuring access to nearest source, doesn't matter which.
                }

### Config - Script run final configs

In [8]:
if test:
    db_save = False
    local_save = True
else:
    db_save = True
    local_save = False

## Main function

### Step 1 - Create OSMnx Network

In [74]:
##########################################################################################
# STEP 1: CREATE OSMNX NETWORK
# ------------------- This step downloads the osmnx network for the area of interest.

# Read area of interest (aoi)
aoi = gpd.read_file(aoi_dir)
aoi = aoi.to_crs("EPSG:4326")
print(f"--- Starting creation of osmnx network.")

# Download osmnx network (G, nodes and edges from bounding box of aoi)
G, nodes, edges = aup.create_osmnx_network(aoi,how='from_bbox')
print(f"--- Finished creating osmnx network.")

--- Starting creation of osmnx network.
Extracted min and max coordinates from the municipality. Polygon N:22.10033, S:21.62227, E-102.06451, W-102.59887.
Created OSMnx graph from bounding box.
Converted OSMnx graph to 60765 nodes and 143092 edges GeoDataFrame.
Filtered columns.
Column: osmid in nodes gdf, has a list in it, the column data was converted to string.
Column: lanes in nodes gdf, has a list in it, the column data was converted to string.
Column: name in nodes gdf, has a list in it, the column data was converted to string.
Column: highway in nodes gdf, has a list in it, the column data was converted to string.
Column: maxspeed in nodes gdf, has a list in it, the column data was converted to string.
Column: ref in nodes gdf, has a list in it, the column data was converted to string.
--- Finished creating osmnx network.


In [75]:
nodes.head(1)

Unnamed: 0,osmid,x,y,street_count,geometry
0,301189389,-102.342212,21.848544,4,POINT (-102.34221 21.84854)


In [76]:
edges.head(1)

Unnamed: 0,osmid,v,u,key,oneway,lanes,name,highway,maxspeed,length,geometry,bridge,ref,junction,tunnel,access,width,service
0,713153965,1408187972,301189389,0,False,2,Calle Constitución,residential,,13.812,"LINESTRING (-102.34221 21.84854, -102.34219 21...",,,,,,,


### Step 2 - Analyse points of interest (to nodes)

#### Getting to pois_time

In [30]:
def pois_time_test(G, nodes, edges, pois, poi_name, prox_measure,count_pois=(False,0)):
    ##########################################################################################
    # STEP 1: NEAREST. 
    # Finds and assigns nearest node OSMID to each point of interest.
       
    # Defines projection for downloaded data
    pois = pois.set_crs("EPSG:4326")
    nodes = nodes.set_crs("EPSG:4326")
    edges = edges.set_crs("EPSG:4326")
    
    # In case there are no amenities of the type in the city, prevents it from crashing if len = 0
    if len(pois) == 0:
        nodes_time = nodes.copy()
    
        # Format
        nodes_time.reset_index(inplace=True)
        nodes_time = nodes_time.set_crs("EPSG:4326")
    
        # As no amenities were found, output columns are set to nan.
        nodes_time['time_'+poi_name] = np.nan # Time is set to np.nan.
        print(f"0 {poi_name} found. Time set to np.nan for all nodes.")
        if count_pois[0]: 
            nodes_time[f'{poi_name}_{count_pois[1]}min'] = np.nan # If requested pois_count, value is set to np.nan.
            print(f"0 {poi_name} found. Pois count set to nan for all nodes.")
            nodes_time = nodes_time[['osmid','time_'+poi_name,f'{poi_name}_{count_pois[1]}min','x','y','geometry']]
            return nodes_time
        else:
            nodes_time = nodes_time[['osmid','time_'+poi_name,'x','y','geometry']]
            return nodes_time
    
    else:
        ### Find nearest osmnx node for each DENUE point.
        nearest = aup.find_nearest(G, nodes, pois, return_distance= True)
        nearest = nearest.set_crs("EPSG:4326")
        print(f"Found and assigned nearest node osmid to each {poi_name}.")
        return nearest

In [14]:
### SIMPLIFICATION
parameters = {'Escuelas':{'Preescolar':{'denue_preescolar':[611111, 611112]}}}

In [31]:
##########################################################################################
# STEP 2: ANALYSE POINTS OF INTEREST
# ------------------- This step analysis times (and count of pois at given time proximity if requested) 
# ------------------- using function aup.pois_time. This step is based on script 21.
# ------------------- Main difference lies in how pois are read.

# Read points of interest (pois)
print(f"--- Loading all points of interest.")
pois = gpd.read_file(pois_dir)
pois = pois[['code','geometry']]
pois = pois.set_crs("EPSG:4326")
print(f"--- Loaded {len(pois)} points of interest.")

print(f"""
------------------------------------------------------------
STARTING source pois proximity to nodes analysis for {city}.""")
# PREP. FOR ANALYSIS
i = 0
# PREP. FOR ANALYSIS - List of columns used to deliver final format of Script part 1
all_analysis_cols = []

# SOURCE LOOP
for eje in parameters.keys():
    for amenity in parameters[eje]:
        for source in parameters[eje][amenity]:
            source_analysis_cols = []

            print(f"""
Analysing source {source}.""")
            
            # 2.1 --------------- SAVE ANALYSIS COLUMN NAMES
            # Source col to lists
            source_analysis_cols.append(source)
            all_analysis_cols.append(source)
            # If counting pois, create and append column 
            # count_col formated example: 'denue_preescolar_15min'
            if count_pois[0]:
                count_col = f'{source}_{count_pois[1]}min'
                source_analysis_cols.append(count_col)
                all_analysis_cols.append(count_col)

            # 2.2 --------------- GET POIS - Select source points of interest 
            # (concats all data corresponding to current source in source_pois)
            source_pois = gpd.GeoDataFrame()
            for code in parameters[eje][amenity][source]:
                code_pois = pois.loc[pois['code'] == code]
                print(f"Added {len(code_pois)} of code {code}.")
                source_pois = pd.concat([source_pois,code_pois])
            print(f"--- {source_pois.shape[0]} {source} pois. Analysing source pois proximity to nodes.")
            
            # 2.3 --------------- SOURCE ANALYSIS
            # Calculate time data from nodes to source
            source_nodes_time = pois_time_test(G, nodes, edges, source_pois, source, prox_measure='length', count_pois=count_pois)
            # Format
            #source_nodes_time.rename(columns={'time_'+source:source},inplace=True)
            #source_nodes_time = source_nodes_time[['osmid']+source_analysis_cols+['x','y','geometry']]

--- Loading all points of interest.
--- Loaded 20792 points of interest.

------------------------------------------------------------
STARTING source pois proximity to nodes analysis for Aguascalientes.

Analysing source denue_preescolar.
Added 113 of code 611111.
Added 193 of code 611112.
--- 306 denue_preescolar pois. Analysing source pois proximity to nodes.
Found and assigned nearest node osmid to each denue_preescolar.


In [34]:
nearest = source_nodes_time.copy()
nearest.head(1)

Unnamed: 0,code,geometry,osmid,distance_node
0,611111,POINT (-102.27464 21.90191),18381,16.377978


#### Accessing calculate_distance_nearest_poi from pois_time

In [51]:
def get_to_calculate_distance_nearest_poi(nodes,edges,nearest,prox_measure='length'):
    poi_name = source
    edges_test = edges.copy() 
    
    edges_test[prox_measure].fillna(edges_test[prox_measure].mean(),inplace=True)
    # If prox_measure = 'length', calculates time_min assuming walking speed = 4km/hr
    if prox_measure == 'length':
        edges_test['time_min'] = (edges_test['length']*60)/4000

    # 2.2 --------------- ELEMENTS NEEDED OUTSIDE THE ANALYSIS LOOP
    # The pois are divided by batches of 200 or 250 pois and analysed using the function calculate_distance_nearest_poi.

    # nodes_analysis is a nodes gdf (index reseted) used in the function aup.calculate_distance_nearest_poi.
    nodes_analysis = nodes.reset_index().copy()
    # nodes_time: int_gdf stores, processes time data within the loop and returns final gdf. (df_int, df_temp, df_min and nodes_distance in previous code versions)
    nodes_time = nodes.copy()

    # --------------- 2.3 PROCESSING DISTANCE
    print (f"Starting time analysis for {poi_name}.")

    # List of columns with output data by batch
    time_cols = []
    poiscount_cols = []

    # If possible, analyses by batches of 200 pois.
    if len(nearest) % 250:
        batch_size = len(nearest)/200
        for k in range(int(batch_size)+1):
            print(f"Starting range k = {k+1} of {int(batch_size)+1} for {poi_name}.")
            # Calculate
            source_process = nearest.iloc[int(200*k):int(200*(1+k))].copy()
            print(f"Process size: {len(source_process)} pois.")
            a = """
            nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_test, poi_name, 'osmid', wght='time_min',count_pois=count_pois)

            # Extract from nodes_distance_prep the calculated time data
            batch_time_col = 'time_'+str(k)+poi_name
            time_cols.append(batch_time_col)
            nodes_time[batch_time_col] = nodes_distance_prep['dist_'+poi_name]

            # If requested, extract from nodes_distance_prep the calculated pois count
            if count_pois[0]:
                batch_poiscount_col = f'{poi_name}_{str(k)}_{count_pois[1]}min'
                poiscount_cols.append(batch_poiscount_col)
                nodes_time[batch_poiscount_col] = nodes_distance_prep[f'{poi_name}_{count_pois[1]}min']

        # After batch processing is over, find final output values for all batches.
        # For time data, apply the min function to time columns.
        nodes_time['time_'+poi_name] = nodes_time[time_cols].min(axis=1)
        # If requested, apply the sum function to pois_count columns. 
        if count_pois[0]:
            # Sum pois count
            nodes_time[f'{poi_name}_{count_pois[1]}min'] = nodes_time[poiscount_cols].sum(axis=1)"""
    
    # Else, analyses by batches of 250 pois.
    else:
        batch_size = len(nearest)/250
        for k in range(int(batch_size)+1):
            print(f"Starting range k = {k+1} of {int(batch_size)+1} for source {poi_name}.")
            # Calculate
            source_process = nearest.iloc[int(250*k):int(250*(1+k))].copy()
            print(f"Process size: {len(source_process)} pois.")
            a = """
            nodes_distance_prep = aup.calculate_distance_nearest_poi(source_process, nodes_analysis, edges_test, poi_name, 'osmid', wght='time_min',count_pois=count_pois)

            # Extract from nodes_distance_prep the calculated time data
            batch_time_col = 'time_'+str(k)+poi_name
            time_cols.append(batch_time_col)
            nodes_time[batch_time_col] = nodes_distance_prep['dist_'+poi_name]

            # If requested, extract from nodes_distance_prep the calculated pois count
            if count_pois[0]:
                batch_poiscount_col = f'{poi_name}_{str(k)}_{count_pois[1]}min'
                poiscount_cols.append(batch_poiscount_col)
                nodes_time[batch_poiscount_col] = nodes_distance_prep[f'{poi_name}_{count_pois[1]}min']

        # After batch processing is over, find final output values for all batches.
        # For time data, apply the min function to time columns.
        nodes_time['time_'+poi_name] = nodes_time[time_cols].min(axis=1)
        # If requested, apply the sum function to pois_count columns. 
        if count_pois[0]:
            # Sum pois count
            nodes_time[f'{poi_name}_{count_pois[1]}min'] = nodes_time[poiscount_cols].sum(axis=1)

    print(f"Finished time analysis for {poi_name}.")"""

    return edges_test,nodes_analysis, source_process

In [52]:
edges_test,nodes_analysis, source_process = get_to_calculate_distance_nearest_poi(nodes,edges,nearest,prox_measure='length')
source_process.head(1)

Starting time analysis for denue_preescolar.
Starting range k = 1 of 2 for denue_preescolar.
Process size: 200 pois.
Starting range k = 2 of 2 for denue_preescolar.
Process size: 106 pois.


Unnamed: 0,code,geometry,osmid,distance_node
200,611112,POINT (-102.24574 21.88898),24264,15.966903


In [53]:
edges_test.head(1)

Unnamed: 0,osmid,v,u,key,oneway,lanes,name,highway,maxspeed,length,geometry,bridge,ref,junction,tunnel,access,width,service,time_min
0,713153965,1408187972,301189389,0,False,2,Calle Constitución,residential,,13.812,"LINESTRING (-102.34221 21.84854, -102.34219 21...",,,,,,,,0.20718


In [54]:
nodes_analysis.head(1)

Unnamed: 0,index,osmid,x,y,street_count,geometry
0,0,301189389,-102.342212,21.848544,4,POINT (-102.34221 21.84854)


#### Accessing get_seeds from calculate_distance_nearest_poi

In [66]:
# Based on calculate_distance_nearest_poi
def get_to_get_seeds():
    gdf_f = source_process.copy()
    nodes_test2 = nodes_analysis.copy()
    edges_test2 = edges_test.copy()
    amenity_name = source
    column_name = 'osmid'
    wght = 'length'
    get_nearest_poi=(False, 'poi_id_column')
    count_pois = (False,15)
    max_distance=(0,'distance_node')

    # --- Required processing
    nodes = nodes.copy()
    edges = edges.copy()
    if max_distance[0] > 0:
        gdf_f = gdf_f.loc[gdf_f[max_distance[1]]<=max_distance[0]]
    g, weights, node_mapping = to_igraph(nodes,edges,wght=wght) #convert to igraph to run the calculations

    return weights

In [67]:
weights = get_to_get_seeds
weights

<function __main__.get_to_get_seeds()>

In [71]:
aoi_diss = aoi.dissolve()

G_hippo, nodes_hippo, edges_hippo = aup.graph_from_hippo(aoi_diss, schema='osmnx', edges_folder='edges_speed', nodes_folder='nodes')

In [72]:
nodes_hippo.head(1)

Unnamed: 0_level_0,x,y,street_count,geometry
osmid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
272921360,-102.295073,21.872876,3,POINT (-102.29507 21.87288)


In [73]:
edges_hippo.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,oneway,lanes,name,highway,length,geometry,grade,grade_abs,access,tunnel,ref,maxspeed,bridge,junction,service,width,walkspeed,time_min
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3003502781,8424128014,0,296556921,True,1,Carretera Aguascalietes-San Marcos,primary,17.585,"LINESTRING (-102.20343 21.99742, -102.20327 21...",0.0,0.0,,,MEX 25,80,,,,,4.0,0.263775


## Continuation

In [None]:
print(f"""
------------------------------------------------------------
STARTING source pois proximity to nodes analysis for {city}.""")
# PREP. FOR ANALYSIS
i = 0
# PREP. FOR ANALYSIS - List of columns used to deliver final format of Script part 1
all_analysis_cols = []

# SOURCE LOOP
for eje in parameters.keys():
    for amenity in parameters[eje]:
        for source in parameters[eje][amenity]:
            source_analysis_cols = []

            print(f"""
Analysing source {source}.""")
            
            # 2.1 --------------- SAVE ANALYSIS COLUMN NAMES
            # Source col to lists
            source_analysis_cols.append(source)
            all_analysis_cols.append(source)
            # If counting pois, create and append column 
            # count_col formated example: 'denue_preescolar_15min'
            if count_pois[0]:
                count_col = f'{source}_{count_pois[1]}min'
                source_analysis_cols.append(count_col)
                all_analysis_cols.append(count_col)

            # 2.2 --------------- GET POIS - Select source points of interest 
            # (concats all data corresponding to current source in source_pois)
            source_pois = gpd.GeoDataFrame()
            for code in parameters[eje][amenity][source]:
                code_pois = pois.loc[pois['code'] == code]
            source_pois = pd.concat([source_pois,code_pois])
            print(f"--- {source_pois.shape[0]} {source} pois. Analysing source pois proximity to nodes.")
            
            # 2.3 --------------- SOURCE ANALYSIS
            # Calculate time data from nodes to source
            source_nodes_time = aup.pois_time(G, nodes, edges, source_pois, source, prox_measure='length', count_pois=count_pois)
            # Format
            source_nodes_time.rename(columns={'time_'+source:source},inplace=True)
            source_nodes_time = source_nodes_time[['osmid']+source_analysis_cols+['x','y','geometry']]

            # 2.4 --------------- OUTPUT MERGE
            # Merge all sources time data in final output nodes gdf
            if i == 0: # For the first analysed source
                nodes_analysis = source_nodes_time.copy()
            else: # For the following
                nodes_analysis = pd.merge(nodes_analysis,source_nodes_time[['osmid']+source_analysis_cols],on='osmid')

            i = i+1

            print(f"--- FINISHED source {source}. Mean city time = {nodes_analysis[source].mean()}")
        
# 2.5 --------------- Final format for nodes
column_order = ['osmid'] + all_analysis_cols + ['x','y','geometry']
nodes_analysis = nodes_analysis[column_order]

if test:
    nodes_analysis.to_file(nodes_local_save_dir, driver='GPKG')
    print(f"--- Saved {city} nodes gdf locally.")

if save:
    nodes_analysis['city'] = city
    aup.gdf_to_db_slow(nodes_analysis, nodes_save_table, save_schema, if_exists='append')
    print(f"--- Saved {city} nodes gdf in database.")

print(f"""
------------------------------------------------------------
FINISHED source pois proximity to nodes analysis for {city}.""")

# Tests

## Load data

### Load test data

In [2]:
### Test data was created on Script 18 (2024 05 06) using the following configuration: (Only relevant features showed)
#aoi_dir = "../data/external/temporal_todocker/prox_aoi/aoi_ags.gpkg"
#pois_dir = "../data/external/temporal_todocker/prox_aoi/pois_ags.gpkg"
#pop_dir = "../data/external/temporal_todocker/prox_aoi/pop_gdf_ags.gpkg"

# Resolutions of hexgrid output (Required)
#res_list = [8,9]
# Count available amenities at given time proximity (minutes)? (Required)
#count_pois = (False,15) # Must pass a tupple containing a boolean (True or False) and time proximity of interest in minutes (Boolean,time)

#database_network = True
# Database locations
#network_schema = 'osmnx' #(Required if database_network = True)
#nodes_table = 'nodes' #(Required if database_network = True)
#edges_table = 'edges_speed' #(Required if database_network = True)

# Same parameters and weights as established on Script 15

In [3]:
# Location of nodes prox (From Script 18 Aguascalientes test)
nodes_dir = "../../../data/processed/prox_aoi/test_Aguascalientes_script18_nodes.gpkg"
nodes_prox_test = gpd.read_file(nodes_dir)

# Show
print(nodes_prox_test.shape)
print(nodes_prox_test.crs)
nodes_prox_test.head(1)

(51434, 34)
EPSG:4326


Unnamed: 0,osmid,denue_preescolar,denue_primaria,denue_secundaria,clues_primer_nivel,denue_guarderias,denue_dif,denue_supermercado,denue_abarrotes,denue_carnicerias,...,denue_cafe,sip_cancha,sip_unidad_deportiva,sip_espacio_publico,denue_parque_natural,denue_cines,denue_museos,x,y,geometry
0,272921360,10.770629,5.615674,11.812107,6.486561,5.09561,6.486561,3.592175,1.673421,6.486561,...,5.25473,156.885198,28.44305,5.692959,,26.069111,7.589368,-102.295073,21.872876,POINT (-102.29507 21.87288)


In [4]:
# Location of edges prox (From Script 18 Aguascalientes test)
hexs_dir = "../../../data/processed/prox_aoi/test_Aguascalientes_script18_hex.gpkg"
hexs_prox_test = gpd.read_file(hexs_dir)

# Show
print(hexs_prox_test.shape)
print(hexs_prox_test.crs)
hexs_prox_test.head(1)

(1763, 44)
EPSG:4326


Unnamed: 0,hex_id,res,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,max_asistencia social,...,mean_time,median_time,max_time,idx_sum,pobtot,pobfem,pobmas,dens_pob_ha,city,geometry
0,88498e3629fffff,8,15.682623,4.284144,8.876233,15.682623,17.034743,6.388303,7.207663,17.034743,...,8.653304,5.347017,39.802171,12.683419,11349.0,5995.0,5349.0,131.402087,Aguascalientes,"POLYGON ((-102.25098 21.88196, -102.24638 21.8..."


### Load current database data

In [5]:
schema = 'prox_analysis'
table = 'nodes_proximity_2020'
city = 'Aguascalientes'

query = f"SELECT * FROM {schema}.{table} WHERE \"metropolis\" LIKE \'{city}\'"
nodes_prox_db = aup.gdf_from_query(query, geometry_col='geometry')

#Show
print(nodes_prox_db.shape)
nodes_prox_db.head(2)

(2208953, 7)


Unnamed: 0,osmid,x,y,geometry,time,amenity,metropolis
0,272921360,-102.295073,21.872876,POINT (-102.29507 21.87288),10.770629,denue_preescolar,Aguascalientes
1,301191867,-102.274442,21.937135,POINT (-102.27444 21.93714),18.819849,denue_preescolar,Aguascalientes


In [6]:
# Transform original nodes data (rows to columns)
nodes_prox_db_t = nodes_prox_db.drop_duplicates(subset='osmid', keep="last")[['osmid','geometry','metropolis']].copy()
for amenidad in list(nodes_prox_db.amenity.unique()):
    nodes_tmp = nodes_prox_db.loc[nodes_prox_db.amenity == amenidad,['osmid','time']]
    nodes_tmp = nodes_tmp.rename(columns={'time':amenidad})

    if nodes_tmp[amenidad].mean() == 0:
        nodes_tmp[amenidad] = np.nan

    nodes_prox_db_t = nodes_prox_db_t.merge(nodes_tmp, on='osmid')

# Show
print(nodes_prox_db_t.shape)
nodes_prox_db_t.head(1)

(51371, 46)


Unnamed: 0,osmid,geometry,metropolis,denue_preescolar,denue_primaria,denue_secundaria,denue_escuela_mixta,denue_casa_adultos_mayores,denue_guarderias,denue_dif,...,sip_cancha,sip_unidad_deportiva,sip_espacio_publico,sip_mercado,clues_primer_nivel,clues_segundo_nivel,clues_tercer_nivel,sigade_preescolar,sigade_primaria,sigade_secundaria
0,272921360,POINT (-102.29507 21.87288),Aguascalientes,10.770629,5.615674,11.812107,5.25473,9.729402,5.09561,6.486561,...,156.885198,28.44305,5.692959,22.145997,6.486561,6.486561,53.636174,3.327789,3.327789,3.327789


In [7]:
# There are no parques naturales in Aguascalientes
nodes_prox_db_t.denue_parque_natural.unique()

array([nan])

## Comparisons

### Compare nodes proximity

#### Fields to compare (time cols)

In [8]:
# Create amenities list which will be compared by node
amenities_list = list(nodes_prox_test.columns)
amenities_list.remove('osmid')
amenities_list.remove('x')
amenities_list.remove('y')
amenities_list.remove('geometry')
amenities_list

['denue_preescolar',
 'denue_primaria',
 'denue_secundaria',
 'clues_primer_nivel',
 'denue_guarderias',
 'denue_dif',
 'denue_supermercado',
 'denue_abarrotes',
 'denue_carnicerias',
 'sip_mercado',
 'denue_peluqueria',
 'denue_farmacias',
 'denue_ferreteria_tlapaleria',
 'denue_art_limpieza',
 'denue_ropa',
 'denue_calzado',
 'denue_muebles',
 'denue_lavanderia',
 'denue_revistas_periodicos',
 'denue_pintura',
 'denue_restaurante_insitu',
 'denue_restaurante_llevar',
 'denue_bares',
 'denue_cafe',
 'sip_cancha',
 'sip_unidad_deportiva',
 'sip_espacio_publico',
 'denue_parque_natural',
 'denue_cines',
 'denue_museos']

#### Sum of all differences

In [9]:
# Inner merge (to compare same osmids)
compare = nodes_prox_db_t.merge(nodes_prox_test,on='osmid',how='inner')

# Compare old and new amenities
compare_list = []
for amenity in amenities_list:

    old_amenity = f"{amenity}_x"
    new_amenity = f"{amenity}_y"
    compare[f"{amenity}_diff"] = compare[new_amenity] - compare[old_amenity]

    compare_list.append(f"{amenity}_diff")

# Save df with time differences only
compare_diff = compare[compare_list]

# Visualize sum of time differences
summary = pd.DataFrame()
i = 0
for compare_amenity in compare_list:

    diff_value = compare[compare_amenity].sum()
    summary.loc[i,'amenity'] = compare_amenity
    summary.loc[i,'diff'] = diff_value

    i = i+1

# Show
# denue_parque_natural_diff is 0 because there are no parques naturales in Aguascalientes
summary

Unnamed: 0,amenity,diff
0,denue_preescolar_diff,4.831691e-13
1,denue_primaria_diff,4.831691e-13
2,denue_secundaria_diff,6.536993e-13
3,clues_primer_nivel_diff,4.263256e-13
4,denue_guarderias_diff,2.557954e-13
5,denue_dif_diff,0.0
6,denue_supermercado_diff,0.0
7,denue_abarrotes_diff,4.831691e-13
8,denue_carnicerias_diff,2.557954e-13
9,sip_mercado_diff,0.0


#### Size of time difference by node

In [21]:
# How much is the difference between old and new time by node?
# Output table shows the number of nodes with __mins of difference between database data and new (test) data.
differences_to_analyse = [1,0.0000000001,0.000000000000001,0.00000000000000000001,0.0000000000000000000000001]   

nodes_diff_df = pd.DataFrame()

for time in differences_to_analyse:
    i = 0
    for diff_col in compare_list:
        nodes_diff_df.loc[i,'amenity'] = diff_col
        
        nodes_with_positive_diff = compare.loc[(compare[diff_col]>time)].shape[0]
        nodes_with_negative_diff = compare.loc[(compare[diff_col]<-time)].shape[0]

        nodes_diff_df.loc[i,f"{time}mins_diff"] = nodes_with_positive_diff
        nodes_diff_df.loc[i,f"{-time}mins_diff"] = nodes_with_negative_diff

        i = i+1

# There are +51,000 nodes in Aguascalientes
nodes_diff_df

Unnamed: 0,amenity,1mins_diff,-1mins_diff,1e-10mins_diff,-1e-10mins_diff,1e-15mins_diff,-1e-15mins_diff,1e-20mins_diff,-1e-20mins_diff,1e-25mins_diff,-1e-25mins_diff
0,denue_preescolar_diff,0.0,0.0,0.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0
1,denue_primaria_diff,0.0,0.0,0.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0
2,denue_secundaria_diff,0.0,0.0,0.0,0.0,16.0,0.0,16.0,0.0,16.0,0.0
3,clues_primer_nivel_diff,0.0,0.0,0.0,0.0,12.0,0.0,12.0,0.0,12.0,0.0
4,denue_guarderias_diff,0.0,0.0,0.0,0.0,7.0,0.0,7.0,0.0,7.0,0.0
5,denue_dif_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,denue_supermercado_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,denue_abarrotes_diff,0.0,0.0,0.0,0.0,13.0,0.0,13.0,0.0,13.0,0.0
8,denue_carnicerias_diff,0.0,0.0,0.0,0.0,7.0,0.0,7.0,0.0,7.0,0.0
9,sip_mercado_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### __All differences by node are less than 1e10 mins. Success.__

### Compare hexs data

In [29]:
hexs_prox_test = hexs_prox_test.loc[hexs_prox_test.res == 8]

In [22]:
# Version 1 original (Script 01 + 02 + 15)
city = 'Aguascalientes'
prox_schema = 'prox_analysis'
prox_table = 'time_15_min_analysis_hexres8'

query = f"SELECT * FROM {prox_schema}.{prox_table} WHERE \"city\" LIKE \'{city}\'"
hexs_prox_db = aup.gdf_from_query(query, geometry_col='geometry')

hexs_prox_db.rename(columns={'hex_id_8':'hex_id',
                                     'max_idx_15_min':'max_time',
                                     'dens_pobha':'dens_pob_ha'},inplace=True)

# Show
print(hexs_prox_db.shape)
hexs_prox_db.head(1)

(309, 30)


Unnamed: 0,hex_id,geometry,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,max_asistencia social,...,max_time,pobtot,pobfem,pobmas,pob_0a14,pob_15a24,pob_25a59,p_60ymas,dens_pob_ha,city
0,88498e36dbfffff,"POLYGON ((-102.34777 21.92336, -102.34267 21.9...",15.057619,13.586498,15.057619,11.969535,62.167866,53.640209,50.912589,62.167866,...,71.010665,264.7125,131.68126,133.03125,75.9375,50.9625,117.731249,20.081251,3.063433,Aguascalientes


In [23]:
# Create col list which will be compared by hex
col_list = list(hexs_prox_db.columns)
col_list.remove('hex_id')
col_list.remove('geometry')
col_list.remove('pobfem')
col_list.remove('pobmas')
col_list.remove('pob_0a14')
col_list.remove('pob_15a24')
col_list.remove('pob_25a59')
col_list.remove('p_60ymas')
col_list.remove('city')
col_list

['max_escuelas',
 'max_preescolar',
 'max_primaria',
 'max_secundaria',
 'max_servicios comunitarios',
 'max_salud',
 'max_guarderías',
 'max_asistencia social',
 'max_comercio',
 'max_alimentos',
 'max_personal',
 'max_farmacias',
 'max_hogar',
 'max_complementarios',
 'max_entretenimiento',
 'max_social',
 'max_actividad física',
 'max_cultural',
 'max_time',
 'pobtot',
 'dens_pob_ha']

#### Sum of all differences

In [30]:
# Inner merge (to compare same hexs)
compare = hexs_prox_db.merge(hexs_prox_test,on='hex_id',how='inner')

# Compare old and new amenities
compare_list = []
for col in col_list:

    old_col = f"{col}_x"
    new_col = f"{col}_y"
    compare[f"{col}_diff"] = compare[new_col] - compare[old_col]

    compare_list.append(f"{col}_diff")

# Save df with time differences only
compare_diff = compare[compare_list]

# Visualize sum of time differences
summary = pd.DataFrame()
i = 0
for compare_col in compare_list:

    diff_value = compare[compare_col].sum()
    summary.loc[i,'amenity'] = compare_col
    summary.loc[i,'diff'] = diff_value

    i = i+1

# Show
# denue_parque_natural_diff is 0 because there are no parques naturales in Aguascalientes
summary

Unnamed: 0,amenity,diff
0,max_escuelas_diff,-3.552714e-15
1,max_preescolar_diff,-1.776357e-15
2,max_primaria_diff,1.776357e-15
3,max_secundaria_diff,-3.552714e-15
4,max_servicios comunitarios_diff,-1.776357e-15
5,max_salud_diff,3.552714e-15
6,max_guarderías_diff,0.0
7,max_asistencia social_diff,-8.881784e-15
8,max_comercio_diff,-5.329071e-15
9,max_alimentos_diff,-1.776357e-15


#### Size of time difference by hex

In [31]:
# How much is the difference between old and new time by hex?
differences_to_analyse = [1000,100,1,0.000000000001,0.0000000000001,0.00000000000001,0.000000000000001]

hexs_diff_df = pd.DataFrame()

for time in differences_to_analyse:
    i = 0
    for diff_col in compare_list:
        hexs_diff_df.loc[i,'amenity'] = diff_col
        
        hexs_with_positive_diff = compare.loc[(compare[diff_col]>time)].shape[0]
        hexs_with_negative_diff = compare.loc[(compare[diff_col]<-time)].shape[0]

        hexs_diff_df.loc[i,f"{time}mins_diff"] = hexs_with_positive_diff
        hexs_diff_df.loc[i,f"{-time}mins_diff"] = hexs_with_negative_diff

        i = i+1

hexs_diff_df

Unnamed: 0,amenity,1000mins_diff,-1000mins_diff,100mins_diff,-100mins_diff,1mins_diff,-1mins_diff,1e-12mins_diff,-1e-12mins_diff,1e-13mins_diff,-1e-13mins_diff,1e-14mins_diff,-1e-14mins_diff,1e-15mins_diff,-1e-15mins_diff
0,max_escuelas_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,max_preescolar_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,max_primaria_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,max_secundaria_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,max_servicios comunitarios_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,max_salud_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6,max_guarderías_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
7,max_asistencia social_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0
8,max_comercio_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
9,max_alimentos_diff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0


## Investigating differences in pobtot and dens_pob_ha

In [36]:
# From Script 11-pop-block-to-hex:

# Download mun_gdf
city = 'Aguascalientes'
schema = 'metropolis'
folder = 'metro_gdf_2020'
query = f"SELECT * FROM {schema}.{folder} WHERE \"city\" LIKE \'{city}\'"
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')
mun_gdf = mun_gdf.set_crs("EPSG:4326")

# Get CVEGEO list from Municipality
cve_geo_lst = mun_gdf.CVEGEO.unique()
# Query from CVEGEO list
block_schema = 'censo_mza'
block_table = 'censo_mza_2020'

db_pop_gdf = gpd.GeoDataFrame()

for i in range(len(cve_geo_lst)):
    m = cve_geo_lst[i]
    query = f"SELECT * FROM {block_schema}.{block_table} WHERE \"CVEGEO\" LIKE \'{m}%%\'"
    db_pop_gdf = pd.concat([db_pop_gdf,aup.gdf_from_query(query, geometry_col='geometry')])

print(db_pop_gdf.shape)
db_pop_gdf.head(1)

(13024, 239)


Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,geometry,ENTIDAD,...,VPH_TELEF,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC
0,100100010286003,1,1,1,286,3,Urbana,Típica,"POLYGON ((-102.30752 21.90227, -102.30738 21.9...",1,...,,,,,,,,,,


In [39]:
db_pop_gdf = db_pop_gdf[['CVEGEO','POBTOT']]

# Show
print(db_pop_gdf.shape)
db_pop_gdf.head(1)

(13024, 2)


Unnamed: 0,CVEGEO,POBTOT
0,100100010286003,2


In [38]:
# Location of pop data used in Script 18 Aguascalientes test:
pop_dir = "../../../data/external/temporal_todocker/prox_aoi/pop_gdf_ags.gpkg"
pop_test = gpd.read_file(pop_dir)

# Show
print(pop_test.shape)
pop_test.head(1)

(13024, 239)


Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,ENTIDAD,NOM_ENT,...,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC,geometry
0,100100010286011,1,1,1,286,11,Urbana,Típica,1,Aguascalientes,...,14,13,10,6,7,0,0,,0,"POLYGON ((-102.31161 21.90047, -102.31153 21.9..."


In [40]:
pop_test = pop_test[['CVEGEO','POBTOT']]

# Show
print(pop_test.shape)
pop_test.head(1)

(13024, 2)


Unnamed: 0,CVEGEO,POBTOT
0,100100010286011,49


In [41]:
pob_merge = pd.merge(db_pop_gdf,pop_test,on='CVEGEO')

In [43]:
pob_merge['diff'] = pob_merge['POBTOT_x'] - pob_merge['POBTOT_y']
tot_diff = pob_merge['diff'].sum()
tot_diff

0

In [45]:
# From Script 11-pop-block-to-hex:
year = '2020'

#Folder names from database
block_schema = 'censo_mza'
block_folder = f'censo_mza_{year}'
# Iterates over city names for each metropolitan area or capital

block_pop = gpd.GeoDataFrame()
for mun in mun_gdf.CVEGEO.unique():
    query = f"SELECT * FROM {block_schema}.{block_folder} WHERE \"CVEGEO\" LIKE \'{mun}%%\'"
    block_pop = pd.concat([block_pop,aup.gdf_from_query(query, geometry_col='geometry')])

block_pop = block_pop.to_crs("EPSG:4326")

In [46]:
block_pop = block_pop[['CVEGEO','POBTOT']]

# Show
print(block_pop.shape)
block_pop.head(1)

(13024, 2)


Unnamed: 0,CVEGEO,POBTOT
0,100100010286034,0


In [47]:
pob_merge_2 = pd.merge(db_pop_gdf,block_pop,on='CVEGEO')
pob_merge_2['diff'] = pob_merge_2['POBTOT_x'] - pob_merge_2['POBTOT_y']
tot_diff = pob_merge_2['diff'].sum()
tot_diff

0

In [51]:
hexs_prox_db

Unnamed: 0,hex_id,geometry,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,max_asistencia social,...,max_time,pobtot,pobfem,pobmas,pob_0a14,pob_15a24,pob_25a59,p_60ymas,dens_pob_ha,city
0,88498e36dbfffff,"POLYGON ((-102.34777 21.92336, -102.34267 21.9...",15.057619,13.586498,15.057619,11.969535,62.167866,53.640209,50.912589,62.167866,...,71.010665,264.71250,131.681260,133.031250,75.937500,50.962500,117.731249,20.081251,3.063433,Aguascalientes
1,88498e36d5fffff,"POLYGON ((-102.32839 21.92505, -102.32329 21.9...",41.764955,22.511213,23.594408,41.764955,49.597804,49.597804,23.166648,33.483284,...,49.597804,1040.96250,542.348500,498.614000,142.968993,171.018986,497.508140,228.414930,12.047757,Aguascalientes
2,884988d965fffff,"POLYGON ((-102.30350 21.86950, -102.30859 21.8...",9.414826,5.760592,8.909364,9.414826,7.708219,7.708219,6.494321,6.604997,...,18.477344,4793.81450,2521.538300,2272.276400,718.263610,722.956720,2204.958360,1145.526500,55.491630,Aguascalientes
3,88498e36e5fffff,"POLYGON ((-102.28403 21.92106, -102.27894 21.9...",25.733585,6.045731,6.229759,25.733585,12.950800,10.346160,7.569280,12.950800,...,39.031573,7975.77340,4129.789000,3845.984900,1672.289850,1721.849500,4047.386460,528.179440,92.328356,Aguascalientes
4,88498ead33fffff,"POLYGON ((-102.19052 21.81114, -102.19101 21.8...",65.211370,27.264172,28.750990,65.211370,155.210802,37.546756,42.144611,155.210802,...,200.710507,95.46667,48.400000,47.066666,31.933334,16.533333,39.333334,7.666666,1.105764,Aguascalientes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,88498e26e9fffff,"POLYGON ((-102.26995 22.07976, -102.27045 22.0...",36.496634,7.734372,5.377190,36.496634,15.087175,12.791192,10.169352,15.087175,...,273.649453,1882.76590,966.248170,916.517700,453.375886,328.574470,778.879340,321.936200,21.789987,Aguascalientes
305,88498e2659fffff,"POLYGON ((-102.28485 22.03122, -102.27975 22.0...",34.267868,15.506291,15.336270,34.267868,69.115624,69.115624,16.047638,66.022348,...,196.961031,1261.60640,621.377140,640.229500,440.393455,195.688550,584.426086,41.098354,14.601826,Aguascalientes
306,88498e34edfffff,"POLYGON ((-102.25436 21.96826, -102.24977 21.9...",48.081665,46.386499,42.358677,48.081665,90.210029,47.884798,49.712651,90.210029,...,131.103844,626.85187,312.962980,313.888850,189.814817,119.444436,272.685188,44.907406,7.256740,Aguascalientes
307,88498e3439fffff,"POLYGON ((-102.24827 21.95567, -102.24318 21.9...",52.930356,52.930356,52.473884,48.543774,78.667784,45.129058,59.689724,78.667784,...,132.685064,3436.47050,1714.117600,1722.352800,1241.764700,590.000050,1535.293904,69.411766,39.785254,Aguascalientes


In [54]:
test_1 = hexs_prox_db.loc[hexs_prox_db.hex_id == '884988d965fffff']
test_1 = test_1[['hex_id','pobtot','dens_pob_ha']]
test_1

Unnamed: 0,hex_id,pobtot,dens_pob_ha
2,884988d965fffff,4793.8145,55.49163


In [55]:
test_2 = hexs_prox_test.loc[hexs_prox_test.hex_id == '884988d965fffff']
test_2 = test_2[['hex_id','pobtot','dens_pob_ha']]
test_2

Unnamed: 0,hex_id,pobtot,dens_pob_ha
183,884988d965fffff,5307.0,61.432098
