# a-03-comparing_data

> Notebooks starting with __a-__ helped explore a way to __filter cultural/social amenities by nom_estab__ in order to integrate changes into the 2024 proximity analysis.

This notebook loads regular proximity, and proximity with and without bibliotecas in Monterrey, and compares times.

## Import libraries

In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

  ox.config(


## Load data

In [2]:
city = 'Monterrey'
pop_output = True
save_space = False

proximityanalysis_conbibliotecas = '../../../data/external/temporal_fromjupyter/04_proximityanalysis_hexres9_twomethod.gpkg'
proximityanalysis_sinbibliotecas = '../../../data/external/temporal_fromjupyter/04_proximityanalysis_hexres9_twomethod_sinbibliotecas.gpkg'

In [3]:
#--------------- PREPARE DATA ---------- SET PARAMETER
    #This step sets the ejes, amenidades, sources and codes for further analysis
            #{Eje (e):
            #            {Amenity (a):
            #                          {Sources (s):
            #                                           [Codes (c)]
            #                           }
            #             }
            #}

# Con bibliotecas
con_dicc = {'Escuelas':{'Preescolar':{'denue_preescolar':[611111, 611112]},
                                    'Primaria':{'denue_primaria':[611121, 611122]},
                                    'Secundaria':{'denue_secundaria':[611131, 611132]}
                                   },
                        'Servicios comunitarios':{'Salud':{'clues_primer_nivel':[8610]},
                                                  'Guarderías':{'denue_guarderias':[624411, 624412]},
                                                  'Asistencia social':{'denue_dif':[931610]}
                                                 },
                        'Comercio':{'Alimentos':{'denue_supermercado':[462111],
                                                 'denue_abarrotes':[461110], 
                                                 'denue_carnicerias': [461121, 461122, 461123],
                                                 'sip_mercado':[4721]},
                                    'Personal':{'denue_peluqueria':[812110]},
                                    'Farmacias':{'denue_farmacias':[464111, 464112]},
                                    'Hogar':{'denue_ferreteria_tlapaleria':[467111],
                                             'denue_art_limpieza':[467115]},
                                    'Complementarios':{'denue_ropa':[463211, 463212, 463213, 463215, 463216, 463218],
                                                       'denue_calzado':[463310], 
                                                       'denue_muebles':[466111, 466112, 466113, 466114],
                                                       'denue_lavanderia':[812210],
                                                       'denue_revistas_periodicos':[465313],
                                                       'denue_pintura':[467113]}
                                   },
      'Entretenimiento':{'Social':{'denue_restaurante_insitu':[722511, 722512, 722513, 722514, 722519],
                                   'denue_restaurante_llevar':[722516, 722518, 722517],
                                   'denue_bares':[722412],
                                   'denue_cafe':[722515]},
                         'Actividad física':{'sip_cancha':[93110],
                                             'sip_unidad_deportiva':[93111],
                                             'sip_espacio_publico':[9321],
                                             'denue_parque_natural':[712190]},
                         'Cultural':{'denue_cines':[512130],
                                     'denue_museos':[712111, 712112],
                                     'denue_bibliotecas':[519121,519122],
                                     'denue_centrocultural':[711312]}   
                        } 
     }

# Sin bibliotecas
sin_dicc = {'Escuelas':{'Preescolar':{'denue_preescolar':[611111, 611112]},
                        'Primaria':{'denue_primaria':[611121, 611122]},
                        'Secundaria':{'denue_secundaria':[611131, 611132]}
                       },
            'Servicios comunitarios':{'Salud':{'clues_primer_nivel':[8610]},
                                      'Guarderías':{'denue_guarderias':[624411, 624412]},
                                      'Asistencia social':{'denue_dif':[931610]}
                                     },
            'Comercio':{'Alimentos':{'denue_supermercado':[462111],
                                     'denue_abarrotes':[461110], 
                                     'denue_carnicerias': [461121, 461122, 461123],
                                     'sip_mercado':[4721]},
                        'Personal':{'denue_peluqueria':[812110]},
                        'Farmacias':{'denue_farmacias':[464111, 464112]},
                        'Hogar':{'denue_ferreteria_tlapaleria':[467111],
                                 'denue_art_limpieza':[467115]},
                        'Complementarios':{'denue_ropa':[463211, 463212, 463213, 463215, 463216, 463218],
                                           'denue_calzado':[463310], 
                                           'denue_muebles':[466111, 466112, 466113, 466114],
                                           'denue_lavanderia':[812210],
                                           'denue_revistas_periodicos':[465313],
                                           'denue_pintura':[467113]}
                       },
            'Entretenimiento':{'Social':{'denue_restaurante_insitu':[722511, 722512, 722513, 722514, 722519],
                                         'denue_restaurante_llevar':[722516, 722518, 722517],
                                         'denue_bares':[722412],
                                         'denue_cafe':[722515]},
                               'Actividad física':{'sip_cancha':[93110],
                                                   'sip_unidad_deportiva':[93111],
                                                   'sip_espacio_publico':[9321],
                                                   'denue_parque_natural':[712190]},
                               'Cultural':{'denue_cines':[512130],
                                           'denue_museos':[712111, 712112],
                                           'denue_centrocultural':[711312]} 
                              } 
           }

In [4]:
schema = 'prox_analysis'
table = 'proximityanalysis_hexres9'

query = f"SELECT * FROM {schema}.{table} WHERE \"city\" LIKE \'{city}%%\'"
prox_original = aup.gdf_from_query(query, geometry_col='geometry')

prox_original.rename(columns={'hex_id_9':'hex_id'},inplace=True)
prox_original['res'] = 9

# Show
print(prox_original.shape)
prox_original.head(1)

(7425, 42)


Unnamed: 0,hex_id,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,max_asistencia social,max_comercio,...,idx_cultural,mean_time,median_time,max_time,idx_sum,pobtot,dens_pobha,city,geometry,res
0,8948a202b0fffff,7.629481,6.259959,3.48433,7.629481,15.893122,15.893122,7.649913,5.336529,16.045945,...,0.96552,8.014404,6.511771,16.045945,13.358216,563.0,45.916411,Monterrey,"POLYGON ((-100.28445 25.64300, -100.28315 25.6...",9


## Prox analisis con y sin bibliotecas

In [5]:
analysis_variations = ['con_bibliotecas','sin_bibliotecas']

for analysis in analysis_variations:
    # ------------------------------------------------------------------------------------------------------------------ BASE PARAMETERS
    if analysis == 'con_bibliotecas':
        proximityanalysis_save_dir = proximityanalysis_conbibliotecas
        parameters = con_dicc.copy()
    else:
        proximityanalysis_save_dir = proximityanalysis_sinbibliotecas
        parameters = sin_dicc.copy()
    # ------------------------------------------------------------------------------------------------------------------ BASE PARAMETERS

    # ------------------------------------------------------------------------------------------------------------------ MERGE AMENITIES
    # Load fixed amenities
    prox_fixed_amenities = gpd.read_file(proximityanalysis_save_dir)
    prox_fixed_amenities = prox_fixed_amenities[['hex_id','max_asistencia social','max_cultural']]
    # Merge fixed amenities
    nonfiltered_list = ['hex_id','res',
                    'max_escuelas','max_preescolar', 'max_primaria','max_secundaria', 
                    'max_servicios comunitarios', 'max_salud','max_guarderías', #'max_asistencia social', 
                    'max_comercio','max_alimentos', 'max_personal', 'max_farmacias', 'max_hogar','max_complementarios', 
                    'max_entretenimiento', 'max_social','max_actividad física', #'max_cultural',
                    'pobtot', 'dens_pobha', 'city', 'geometry']
    prox_original_f = prox_original[nonfiltered_list]
    prox_fixed = pd.merge(prox_original_f,prox_fixed_amenities,on='hex_id')
    del prox_original_f
    # Reorder columns
    prox_fixed = prox_fixed[['hex_id','res',
                        'max_escuelas','max_preescolar', 'max_primaria','max_secundaria', 
                        'max_servicios comunitarios', 'max_salud','max_guarderías', 'max_asistencia social', 
                        'max_comercio','max_alimentos', 'max_personal', 'max_farmacias', 'max_hogar','max_complementarios', 
                        'max_entretenimiento', 'max_social','max_actividad física', 'max_cultural',
                        'pobtot', 'dens_pobha', 'city', 'geometry']]
    # ------------------------------------------------------------------------------------------------------------------ MERGE AMENITIES

    # ------------------------------------------------------------------------------------------------------------------ PROX ANALYSIS PREVIOUS STEPS NECESSARY DATA
    # Create definitions dicc out of main parameters dicc:
    definitions = {}
    for eje in parameters.keys():
        # Temporary dicc stores amenity:[source_list] for each eje
        tmp_dicc = {}
        for amenity in parameters[eje]:
            items_lst = []
            items = list(parameters[eje][amenity].items())
            for item in items:
                items_lst.append(item[0])
            tmp_dicc[amenity] = items_lst
        # Each eje gets assigned its own tmp_dicc
        definitions[eje] = tmp_dicc
    column_max_all = [] # list with all max index column names
    for e in definitions.keys():
        #Appends to 3 lists currently examined eje
        column_max_all.append('max_'+ e.lower())
        #Goes through each amenity of current eje:
        for a in definitions[e].keys():
            column_max_all.append('max_'+ a.lower())
    column_max_all.append('max_time')
    column_max_all.append('osmid')
    column_max_all.append('geometry')

    # Create hex_idx
    hex_idx = prox_fixed.copy()
    # ------------------------------------------------------------------------------------------------------------------ PROX ANALYSIS PREVIOUS STEPS NECESSARY DATA

    # ------------------------------------------------------------------------------------------------------------------ RE-CALCULATE MAX TIMES BY HEXAGON
    # This step recalculates max time to each eje from max times to calculated amenities and max_time from max eje
    column_max_ejes = [] # list with ejes index column names
    #Goes (again) through each eje in dictionary:
    for e in definitions.keys():
        column_max_ejes.append('max_'+ e.lower())
        column_max_amenities = [] # list with amenities in current eje
        #Goes (again) through each amenity of current eje:    
        for a in definitions[e].keys():
            column_max_amenities.append('max_'+ a.lower())
        #Re-calculates time to currently examined eje (max time of its amenities):        
        hex_idx['max_'+ e.lower()] = hex_idx[column_max_amenities].max(axis=1)
    hex_idx['max_time'] = hex_idx[column_max_ejes].max(axis=1)   
    
    print('Finished recalculating times in hexagons')
    
    # ------------------------------------------------------------------------------------------------------------------ RE-CALCULATE MAX TIMES BY HEXAGON
    
    # ------------------------------------------------------------------------------------------------------------------ RINDEX, MEDIAN AND MEAN CALCULATION
    #Define function
    def apply_sigmoidal(x):
        if x == -1:
            return -1
        elif x > 1000:
            return 0
        else:
            val = aup.sigmoidal_function(0.1464814753435666, x, 30)
            return val
    
    #Apply function to amenities columns without ejes ---------------------------------------------///// En lugar de pasarle la lista de amenidades, se crea la lista de amenidades desde column_max_all
    max_amenities_cols = [i for i in column_max_all if i not in column_max_ejes]
    max_amenities_cols.remove('max_time')
    max_amenities_cols.remove('osmid')
    max_amenities_cols.remove('geometry')
    
    idx_amenities_cols = [] # list with idx amenity column names
    for ac in max_amenities_cols:
        idx_col = ac.replace('max','idx')
        hex_idx[idx_col] = hex_idx[ac].apply(apply_sigmoidal)
        idx_amenities_cols.append(idx_col)
    
    # Add final data
    hex_idx['max_time'] = hex_idx[column_max_ejes].max(axis=1)
    hex_idx['mean_time'] = hex_idx[max_amenities_cols].mean(axis=1)
    hex_idx['median_time'] = hex_idx[max_amenities_cols].median(axis=1)
    hex_idx['idx_sum'] = hex_idx[idx_amenities_cols].sum(axis=1)
    hex_idx['city'] = city

    print('Finished calculating index, mean and median time')

    # ------------------------------------------------------------------------------------------------------------------ INDEX, MEDIAN AND MEAN CALCULATION
    

    # ------------------------------------------------------------------------------------------------------------------ FINAL FORMAT REORDER COLUMNS

    # First elements of ordered list - ID and geometry
    first_elements = ['hex_id','res','geometry']
    # Second elements of ordered list - max_ejes and max_amenities removing max_time, osmid and geometry.
    column_max_ejes_amenities = column_max_all.copy()
    column_max_ejes_amenities.remove('max_time')
    column_max_ejes_amenities.remove('osmid')
    column_max_ejes_amenities.remove('geometry')
    # Third elements of ordered list are listed in idx_amenities_cols
    # Fourth elements of ordered list - Mean, median, max and idx
    fourth_elements = ['mean_time', 'median_time', 'max_time', 'idx_sum']
    # Fifth elements - If pop is calculated - Pop data
    fifth_elements = ['pobtot', 'dens_pobha']
    # Last element - City data
    last_element = ['city']
    if pop_output:
        final_column_ordered_list = first_elements + column_max_ejes_amenities + idx_amenities_cols + fourth_elements + fifth_elements + last_element
    else:
        final_column_ordered_list = first_elements + column_max_ejes_amenities + idx_amenities_cols + fourth_elements + last_element
    hex_idx_city = hex_idx[final_column_ordered_list]
        
    print('Finished final format')

    # ------------------------------------------------------------------------------------------------------------------ FINAL FORMAT REORDER COLUMNS

    if analysis == 'con_bibliotecas': 
        hex_idx_conbibliotecas = hex_idx_city.copy()
    else:
        hex_idx_sinbibliotecas = hex_idx_city.copy()

Finished recalculating times in hexagons
Finished calculating index, mean and median time
Finished final format
Finished recalculating times in hexagons
Finished calculating index, mean and median time
Finished final format


In [6]:
# Show
print(hex_idx_conbibliotecas.shape)
print(hex_idx_conbibliotecas.max_cultural.mean())
hex_idx_conbibliotecas.head(1)

(7423, 42)
40.28702438016813


Unnamed: 0,hex_id,res,geometry,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,...,idx_social,idx_actividad física,idx_cultural,mean_time,median_time,max_time,idx_sum,pobtot,dens_pobha,city
0,8948a202b0fffff,9,"POLYGON ((-100.28445 25.64300, -100.28315 25.6...",7.629481,6.259959,3.48433,7.629481,28.147561,15.893122,7.649913,...,0.968852,0.978012,0.282017,11.724365,7.081654,36.379502,12.268405,563.0,45.916411,Monterrey


In [7]:
# Show
print(hex_idx_sinbibliotecas.shape)
print(hex_idx_sinbibliotecas.max_cultural.mean())
hex_idx_sinbibliotecas.head(1)

(7423, 42)
52.97764662091525


Unnamed: 0,hex_id,res,geometry,max_escuelas,max_preescolar,max_primaria,max_secundaria,max_servicios comunitarios,max_salud,max_guarderías,...,idx_social,idx_actividad física,idx_cultural,mean_time,median_time,max_time,idx_sum,pobtot,dens_pobha,city
0,8948a202b0fffff,9,"POLYGON ((-100.28445 25.64300, -100.28315 25.6...",7.629481,6.259959,3.48433,7.629481,28.147561,15.893122,7.649913,...,0.968852,0.978012,0.012703,13.391402,7.081654,59.718011,11.999091,563.0,45.916411,Monterrey


## Comparing changes

In [8]:
merged = pd.merge(hex_idx_conbibliotecas,hex_idx_sinbibliotecas,on='hex_id')
merged_2 = pd.merge(merged,prox_original,on='hex_id')

# columna_x = con bibliotecas
# columna_y = sin bibliotecs
# columna   = original

# Show
print(merged_2.shape)
merged_2.head(1)

(7423, 124)


Unnamed: 0,hex_id,res_x,geometry_x,max_escuelas_x,max_preescolar_x,max_primaria_x,max_secundaria_x,max_servicios comunitarios_x,max_salud_x,max_guarderías_x,...,idx_cultural,mean_time,median_time,max_time,idx_sum,pobtot,dens_pobha,city,geometry,res
0,8948a202b0fffff,9,"POLYGON ((-100.28445 25.64300, -100.28315 25.6...",7.629481,6.259959,3.48433,7.629481,28.147561,15.893122,7.649913,...,0.96552,8.014404,6.511771,16.045945,13.358216,563.0,45.916411,Monterrey,"POLYGON ((-100.28445 25.64300, -100.28315 25.6...",9


In [9]:
comparing_cols = max_amenities_cols + idx_amenities_cols + ['mean_time', 'median_time', 'max_time', 'idx_sum','pobtot', 'dens_pobha']

summary = pd.DataFrame()
i = 0

for col in comparing_cols:
    original_col = col
    bibliotecas_col = col+'_x'
    sinbibliotecas_col = col+'_y'

    original_mean = merged_2[original_col].mean()
    biblio_mean = merged_2[bibliotecas_col].mean()
    sinbiblio_mean = merged_2[sinbibliotecas_col].mean()

    summary.loc[i,'field'] = col
    summary.loc[i,'original'] = original_mean
    summary.loc[i,'conbiblio'] = biblio_mean
    summary.loc[i,'sinbiblio'] = sinbiblio_mean

    i = i+1

summary['diff_con'] = summary['conbiblio'] - summary['original']
summary['diff_sin'] = summary['sinbiblio'] - summary['original']

# Show
print(summary.shape)
summary

(34, 6)


Unnamed: 0,field,original,conbiblio,sinbiblio,diff_con,diff_sin
0,max_preescolar,15.899131,15.899131,15.899131,0.0,0.0
1,max_primaria,17.106348,17.106348,17.106348,0.0,0.0
2,max_secundaria,29.789805,29.789805,29.789805,0.0,0.0
3,max_salud,20.185867,20.185867,20.185867,0.0,0.0
4,max_guarderías,31.461326,31.461326,31.461326,0.0,0.0
5,max_asistencia social,34.546681,49.640818,49.640818,15.094137,15.094137
6,max_alimentos,8.008375,8.008375,8.008375,0.0,0.0
7,max_personal,11.363989,11.363989,11.363989,0.0,0.0
8,max_farmacias,19.572803,19.572803,19.572803,0.0,0.0
9,max_hogar,13.331633,13.331633,13.331633,0.0,0.0


In [10]:
farthest = merged_2.copy()

farthest['original_farthest'] = farthest[['max_escuelas','max_servicios comunitarios','max_comercio','max_entretenimiento']].idxmax(axis=1)
farthest['conbiblio_farthest'] = farthest[['max_escuelas_x','max_servicios comunitarios_x','max_comercio_x','max_entretenimiento_x']].idxmax(axis=1)
farthest['sinbiblio_farthest'] = farthest[['max_escuelas_y','max_servicios comunitarios_y','max_comercio_y','max_entretenimiento_y']].idxmax(axis=1)

farthest = farthest[['hex_id','original_farthest','conbiblio_farthest','sinbiblio_farthest']]


farthest_summary = pd.DataFrame()

i = 0
for eje in column_max_ejes:
    original_col = eje
    con_col = eje+'_x'
    sin_col = eje+'_y'

    farthest_summary.loc[i,'field'] = eje
    
    df_1 = farthest.loc[farthest.original_farthest == original_col]
    df_1_length = df_1.shape[0]
    farthest_summary.loc[i,'original'] = df_1_length
    
    df_2 = farthest.loc[farthest.conbiblio_farthest == con_col]
    df_2_length = df_2.shape[0]
    farthest_summary.loc[i,'conbiblio'] = df_2_length

    df_3 = farthest.loc[farthest.sinbiblio_farthest == sin_col]
    df_3_length = df_3.shape[0]
    farthest_summary.loc[i,'sinbiblio'] = df_3_length

    i = i+1

farthest_summary['diff_con'] = farthest_summary['conbiblio'] - farthest_summary['original']
farthest_summary['diff_sin'] = farthest_summary['sinbiblio'] - farthest_summary['original']

farthest_summary

Unnamed: 0,field,original,conbiblio,sinbiblio,diff_con,diff_sin
0,max_escuelas,870.0,671.0,540.0,-199.0,-330.0
1,max_servicios comunitarios,1471.0,4192.0,3212.0,2721.0,1741.0
2,max_comercio,83.0,106.0,75.0,23.0,-8.0
3,max_entretenimiento,4999.0,2454.0,3596.0,-2545.0,-1403.0
