# Proximity analysis

Increase resolution for proximity data from hex res=9 to res=10.

__NOTE: Re-ran this notebook on 2025/08/21 in res 9 for streamlit. Named output table: f'volvo_proxanalysis_2025_mza_hex{res}'__

## Import libraries

In [1]:
import geopandas as gpd
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt 

import os
import sys
module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup
else:
    import aup

## Notebook config

In [2]:
res = 9

## Download/load data

### Node proximity data

In [3]:
schema = 'projects_research'
table = 'volvo_proxanalysis_2024_nodes'

nodes = aup.gdf_from_db(table, schema)
print(nodes.shape)
nodes.head(2)

(184338, 30)


Unnamed: 0,osmid,denue_guarderias,denue_guarderias_15min,denue_preescolar,denue_preescolar_15min,denue_primaria,denue_primaria_15min,denue_secundaria,denue_secundaria_15min,clues_primer_nivel,...,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_bancos,denue_bancos_15min,x,y,odc_parques,odc_parques_15min,geometry
0,28751344,13.128475,1.0,1.80726,5.0,7.275387,8.0,3.88002,2.0,7.275387,...,0.0,15.68526,0.0,12.983204,2.0,-103.306309,20.711533,5.763263,6,POINT (-103.30631 20.71153)
1,28753224,5.504485,2.0,5.731417,12.0,7.703541,10.0,16.288914,0.0,11.194589,...,0.0,12.200104,1.0,5.69019,5.0,-103.316645,20.70065,9.889999,2,POINT (-103.31665 20.70065)


In [4]:
nodes.columns

Index(['osmid', 'denue_guarderias', 'denue_guarderias_15min',
       'denue_preescolar', 'denue_preescolar_15min', 'denue_primaria',
       'denue_primaria_15min', 'denue_secundaria', 'denue_secundaria_15min',
       'clues_primer_nivel', 'clues_primer_nivel_15min', 'denue_farmacias',
       'denue_farmacias_15min', 'sip_cancha', 'sip_cancha_15min',
       'sip_unidad_deportiva', 'sip_unidad_deportiva_15min', 'denue_cines',
       'denue_cines_15min', 'denue_museos', 'denue_museos_15min',
       'denue_bibliotecas', 'denue_bibliotecas_15min', 'denue_bancos',
       'denue_bancos_15min', 'x', 'y', 'odc_parques', 'odc_parques_15min',
       'geometry'],
      dtype='object')

### Hexgrid

In [5]:
schema = 'hexgrid'
table = f'hexgrid_{res}_city_2020'

query = f'SELECT * FROM {schema}.{table} WHERE \"city\" = \'Guadalajara\' and \"type\" = \'urban\' '

hex_gdf = aup.gdf_from_query(query)
print(hex_gdf.shape)
hex_gdf.head(2)

(6540, 6)


Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type
0,89498c96e0bffff,"POLYGON ((-103.35267 20.66765, -103.35145 20.6...",14039,Guadalajara,Guadalajara,urban
1,89498c960a3ffff,"POLYGON ((-103.34713 20.62771, -103.34591 20.6...",14039,Guadalajara,Guadalajara,urban


## Data to hex

### Average by hexagon

In [6]:
prox_cols = ['denue_guarderias', 'denue_guarderias_15min',
       'denue_preescolar', 'denue_preescolar_15min', 'denue_primaria',
       'denue_primaria_15min', 'denue_secundaria', 'denue_secundaria_15min',
       'clues_primer_nivel', 'clues_primer_nivel_15min', 'denue_farmacias',
       'denue_farmacias_15min', 'sip_cancha', 'sip_cancha_15min',
       'sip_unidad_deportiva', 'sip_unidad_deportiva_15min', 'denue_cines',
       'denue_cines_15min', 'denue_museos', 'denue_museos_15min',
       'denue_bibliotecas', 'denue_bibliotecas_15min', 'denue_bancos',
       'denue_bancos_15min', 'odc_parques', 'odc_parques_15min']

In [7]:
hex_prox = aup.group_by_hex_mean(nodes, hex_gdf, prox_cols, f'hex_id_{res}')
print(hex_prox.shape)
hex_prox.head(2)

(6540, 34)


Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type,denue_guarderias,denue_guarderias_15min,denue_preescolar,denue_preescolar_15min,...,denue_museos,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_bancos,denue_bancos_15min,x,y,odc_parques,odc_parques_15min
0,89498c96e0bffff,"POLYGON ((-103.35267 20.66765, -103.35145 20.6...",14039,Guadalajara,Guadalajara,urban,8.663587,2.517241,3.243826,8.931034,...,11.85325,1.844828,6.160919,1.086207,2.161552,44.12069,-103.353999,20.669368,4.682766,10.224138
1,89498c960a3ffff,"POLYGON ((-103.34713 20.62771, -103.34591 20.6...",14039,Guadalajara,Guadalajara,urban,10.006852,1.0,10.159873,2.628571,...,59.815642,0.0,43.583564,0.0,1.114521,12.342857,-103.348465,20.62886,5.613895,3.885714


In [8]:
hex_prox = hex_prox.loc[hex_prox.denue_preescolar!=0].copy()
print(hex_prox.shape)
hex_prox.head(2)

(6353, 34)


Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type,denue_guarderias,denue_guarderias_15min,denue_preescolar,denue_preescolar_15min,...,denue_museos,denue_museos_15min,denue_bibliotecas,denue_bibliotecas_15min,denue_bancos,denue_bancos_15min,x,y,odc_parques,odc_parques_15min
0,89498c96e0bffff,"POLYGON ((-103.35267 20.66765, -103.35145 20.6...",14039,Guadalajara,Guadalajara,urban,8.663587,2.517241,3.243826,8.931034,...,11.85325,1.844828,6.160919,1.086207,2.161552,44.12069,-103.353999,20.669368,4.682766,10.224138
1,89498c960a3ffff,"POLYGON ((-103.34713 20.62771, -103.34591 20.6...",14039,Guadalajara,Guadalajara,urban,10.006852,1.0,10.159873,2.628571,...,59.815642,0.0,43.583564,0.0,1.114521,12.342857,-103.348465,20.62886,5.613895,3.885714


Rename columns

In [9]:
prox_rename = [p[p.find('_')+1:] for p in prox_cols]
# rox_rename = ['time_'+p for p in prox_rename if '15min' not in p else p]
prox_rename = ['time_' + x if not x.endswith('_15min') else x for x in prox_rename]
prox_rename

['time_guarderias',
 'guarderias_15min',
 'time_preescolar',
 'preescolar_15min',
 'time_primaria',
 'primaria_15min',
 'time_secundaria',
 'secundaria_15min',
 'time_primer_nivel',
 'primer_nivel_15min',
 'time_farmacias',
 'farmacias_15min',
 'time_cancha',
 'cancha_15min',
 'time_unidad_deportiva',
 'unidad_deportiva_15min',
 'time_cines',
 'cines_15min',
 'time_museos',
 'museos_15min',
 'time_bibliotecas',
 'bibliotecas_15min',
 'time_bancos',
 'bancos_15min',
 'time_parques',
 'parques_15min']

In [10]:
prox_dict = dict(zip(prox_cols, prox_rename))
hex_prox = hex_prox.rename(columns=prox_dict)
hex_prox.head(2)

Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type,time_guarderias,guarderias_15min,time_preescolar,preescolar_15min,...,time_museos,museos_15min,time_bibliotecas,bibliotecas_15min,time_bancos,bancos_15min,x,y,time_parques,parques_15min
0,89498c96e0bffff,"POLYGON ((-103.35267 20.66765, -103.35145 20.6...",14039,Guadalajara,Guadalajara,urban,8.663587,2.517241,3.243826,8.931034,...,11.85325,1.844828,6.160919,1.086207,2.161552,44.12069,-103.353999,20.669368,4.682766,10.224138
1,89498c960a3ffff,"POLYGON ((-103.34713 20.62771, -103.34591 20.6...",14039,Guadalajara,Guadalajara,urban,10.006852,1.0,10.159873,2.628571,...,59.815642,0.0,43.583564,0.0,1.114521,12.342857,-103.348465,20.62886,5.613895,3.885714


## Interpolate data

Extract hexagons with missing data

In [12]:
hex_missing = hex_gdf.loc[~hex_gdf[f'hex_id_{res}'].isin(hex_prox[f'hex_id_{res}'])]
print(hex_missing.shape)
hex_missing.head(2)

(187, 6)


Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type
250,89498c95d27ffff,"POLYGON ((-103.31335 20.74339, -103.31213 20.7...",14039,Guadalajara,Guadalajara,urban
628,89498c95d1bffff,"POLYGON ((-103.30177 20.73212, -103.30055 20.7...",14039,Guadalajara,Guadalajara,urban


Check CRS

In [13]:
hex_prox = hex_prox.to_crs("EPSG:32613")
hex_missing = hex_missing.to_crs("EPSG:32613")

Interpolate

In [14]:
hex_missing_point = hex_missing[[f'hex_id_{res}','geometry']].copy()
hex_missing_point['geometry'] = hex_missing_point.centroid

for prox in prox_rename:
    int_vals = aup.interpolate_at_points(hex_prox.centroid.x,
                                         hex_prox.centroid.y,
                                         hex_prox[prox],
                                         hex_missing_point.geometry.x,
                                         hex_missing_point.geometry.y,
                                         power=2,
                                         search_radius=300)
    
    hex_missing[prox] = int_vals
    
    print('Finished processing',prox)

print(hex_missing.shape)
hex_missing.head(2)

Finished processing time_guarderias
Finished processing guarderias_15min
Finished processing time_preescolar
Finished processing preescolar_15min
Finished processing time_primaria
Finished processing primaria_15min
Finished processing time_secundaria
Finished processing secundaria_15min
Finished processing time_primer_nivel
Finished processing primer_nivel_15min
Finished processing time_farmacias
Finished processing farmacias_15min
Finished processing time_cancha
Finished processing cancha_15min
Finished processing time_unidad_deportiva
Finished processing unidad_deportiva_15min
Finished processing time_cines
Finished processing cines_15min
Finished processing time_museos
Finished processing museos_15min
Finished processing time_bibliotecas
Finished processing bibliotecas_15min
Finished processing time_bancos
Finished processing bancos_15min
Finished processing time_parques
Finished processing parques_15min
(187, 32)


Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type,time_guarderias,guarderias_15min,time_preescolar,preescolar_15min,...,time_cines,cines_15min,time_museos,museos_15min,time_bibliotecas,bibliotecas_15min,time_bancos,bancos_15min,time_parques,parques_15min
250,89498c95d27ffff,"POLYGON ((675608.497 2294663.635, 675733.734 2...",14039,Guadalajara,Guadalajara,urban,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
628,89498c95d1bffff,"POLYGON ((676827.185 2293429.440, 676952.420 2...",14039,Guadalajara,Guadalajara,urban,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Integrate interpolated data

In [15]:
hex_prox = pd.concat([hex_prox,
                      hex_missing])
print(hex_prox.shape)
hex_prox.head(2)

(6540, 34)


Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type,time_guarderias,guarderias_15min,time_preescolar,preescolar_15min,...,time_museos,museos_15min,time_bibliotecas,bibliotecas_15min,time_bancos,bancos_15min,x,y,time_parques,parques_15min
0,89498c96e0bffff,"POLYGON ((671598.887 2286237.521, 671724.122 2...",14039,Guadalajara,Guadalajara,urban,8.663587,2.517241,3.243826,8.931034,...,11.85325,1.844828,6.160919,1.086207,2.161552,44.12069,-103.353999,20.669368,4.682766,10.224138
1,89498c960a3ffff,"POLYGON ((672220.867 2281821.412, 672346.099 2...",14039,Guadalajara,Guadalajara,urban,10.006852,1.0,10.159873,2.628571,...,59.815642,0.0,43.583564,0.0,1.114521,12.342857,-103.348465,20.62886,5.613895,3.885714


## Category processing

In [16]:
source_weight = {'Escuelas':{'Guarderias':'min', #There is only one source, no effect.
                             'Preescolar':'min', #There is only one source, no effect.
                             'Primaria':'min', #There is only one source, no effect.
                             'Secundaria':'min' #There is only one source, no effect.
                            },
                 'Salud':{'Primer_nivel':'min',#There is only one source, no effect.
                          'Farmacias':'min'#There is only one source, no effect.
                         },
                 'Parques':{'Parques':'min'#There is only one source, no effect.
                           },
                 'Equipamiento deportivo':{'Cancha':'min',#There is only one source, no effect.
                                           'Unidad_deportiva':'min'#There is only one source, no effect.
                                          },
                 'Cultural':{'Cines':'min',#There is only one source, no effect.
                             'Museos':'min',#There is only one source, no effect.
                             'Bibliotecas':'min'#There is only one source, no effect.
                            },
                 'Financiero':{'Bancos':'min'
                              }
                 }

In [17]:
for cat in source_weight.keys():

    amenity_count = []
    amenity_time = []

    for amenity in list(source_weight[cat].keys()):
        amenity_count.append(amenity.lower()+'_15min')
        amenity_time.append('time_'+amenity.lower())

    hex_prox['time_' + cat.lower()] = hex_prox[amenity_time].min(axis=1)
    hex_prox[cat.lower() + '_15min'] = hex_prox[amenity_count].sum(axis=1)
        
print(hex_prox.shape)
hex_prox.head(2)

(6540, 44)


Unnamed: 0,hex_id_9,geometry,CVEGEO,NOMGEO,city,type,time_guarderias,guarderias_15min,time_preescolar,preescolar_15min,...,time_escuelas,escuelas_15min,time_salud,salud_15min,time_equipamiento deportivo,equipamiento deportivo_15min,time_cultural,cultural_15min,time_financiero,financiero_15min
0,89498c96e0bffff,"POLYGON ((671598.887 2286237.521, 671724.122 2...",14039,Guadalajara,Guadalajara,urban,8.663587,2.517241,3.243826,8.931034,...,2.170799,19.362069,3.585968,7.724138,49.457691,0.0,6.160919,4.931034,2.161552,44.12069
1,89498c960a3ffff,"POLYGON ((672220.867 2281821.412, 672346.099 2...",14039,Guadalajara,Guadalajara,urban,10.006852,1.0,10.159873,2.628571,...,7.272814,7.485714,7.321409,2.657143,15.527184,0.685714,43.583564,0.0,1.114521,12.342857


## Upload data

In [18]:
hex_prox = hex_prox.to_crs("EPSG:4326")

In [19]:
table = f'volvo_proxanalysis_2025_mza_hex{res}'
schema = 'projects_research'

aup.gdf_to_db_slow(hex_prox, table, schema, if_exists='replace')