## Import libraries

In [4]:
accesibilidad_urbana = '../../'

In [5]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(accesibilidad_urbana))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## Load, count and sumarize data

In [10]:
# Summary dataframe
summary_df = pd.DataFrame()

# Parameters
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2020'
year = '2020'

# Load all cities
query = f"SELECT * FROM {metro_schema}.{metro_table}"
metro_gdf = aup.gdf_from_query(query, geometry_col='geometry')
metro_gdf = metro_gdf.set_crs("EPSG:4326")
print(f"Loaded all cities.")

# Iterate over each city
i = 0
city_list = list(metro_gdf.city.unique()) # Full run
#city_list = ['Aguascalientes'] # Test
for city in city_list:

    summary_df.loc[i,'city'] = city # Register data
    
    # 1.1 --------------- EXTRACT CITY DATA FROM METRO_GDF
    # Extract current city
    city_gdf = metro_gdf.loc[metro_gdf.city == city]
    city_gdf = city_gdf.set_crs("EPSG:4326")
    # Create a list with all unique cvegeo_mun ('CVE_ENT'+'CVE_MUN') of current city
    city_gdf['cvegeo_mun'] = city_gdf['CVE_ENT']+city_gdf['CVE_MUN']
    cvegeo_mun_lst = list(city_gdf.cvegeo_mun.unique())
    cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))
    # To avoid error that happens when there's only one MUN in State: 
    # SQL e.g.: <<< SELECT * FROM censo.censo_inegi_{year[:2]}_mza WHERE ("entidad" = '02') AND "mun" IN ('001',) >>>
    # Duplicate mun inside tupple if there's only one MUN.
    if len(cvegeo_mun_lst) >= 2:
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))
    else:
        cvegeo_mun_lst.append(cvegeo_mun_lst[0])
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))

    # 1.2 --------------- LOAD AND COUNT POP DATA (AGEBs and Blocks)
    cols = 'pobtot'
    # Load AGEBs and blocks
    query = f"SELECT {cols} FROM censo.censo_inegi_{year[:2]}_ageb WHERE \"cvegeo_mun\" IN {cvegeo_mun_tpl}"
    pop_ageb_gdf = aup.df_from_query(query)
    query = f"SELECT {cols} FROM censo.censo_inegi_{year[:2]}_ageb WHERE \"cvegeo_mun\" IN {cvegeo_mun_tpl}"
    pop_mza_gdf = aup.df_from_query(query)
    # Count population
    summary_df.loc[i,'agebs_pop'] = pop_ageb_gdf['pobtot'].sum() # Register data
    summary_df.loc[i,'blocks_pop'] = pop_mza_gdf['pobtot'].sum() # Register data
    print(f"{city} - Loaded AGEBs and blocks.")
    del pop_ageb_gdf
    del pop_mza_gdf

    # 1.3 --------------- LOAD AND COUNT POP DATA (Nodes)
    cols = 'pobtot'
    # Load nodes
    query = f"SELECT {cols} FROM censo.pobcenso_inegi_{year[:2]}_mzaageb_node WHERE \"city\" LIKE \'{city}\'"
    pop_nodes_gdf = aup.df_from_query(query)
    # Count population
    summary_df.loc[i,'nodes_pop'] = pop_nodes_gdf['pobtot'].sum() # Register data
    print(f"{city} - Loaded nodes.")
    del pop_nodes_gdf

    # 1.4 --------------- LOAD AND COUNT POP DATA (Hexs)
    cols = 'pobtot,dens_pob_ha'
    res_lst = [8,9,10]
    for res in res_lst:
        # Load nodes
        query = f"SELECT {cols} FROM censo.pobcenso_inegi_{year[:2]}_mzaageb_hex WHERE \"city\" = '{city}\' AND \"res\" = '{res}\'"
        pop_hexs_gdf = aup.df_from_query(query)
        # Count population
        summary_df.loc[i,f'hex_{res}_pop'] = pop_hexs_gdf['pobtot'].sum() # Register data
        # Mean density
        summary_df.loc[i,f'hex_{res}_dens'] = pop_hexs_gdf['dens_pob_ha'].mean() # Register data
        print(f"{city} - Loaded hexs res {res}.")
        del pop_hexs_gdf

    i+=1

summary_df

Loaded all cities.
Aguascalientes - Loaded AGEBs and blocks.
Aguascalientes - Loaded nodes.
Aguascalientes - Loaded hexs res 8.
Aguascalientes - Loaded hexs res 9.
Aguascalientes - Loaded hexs res 10.
Ensenada - Loaded AGEBs and blocks.
Ensenada - Loaded nodes.
Ensenada - Loaded hexs res 8.
Ensenada - Loaded hexs res 9.
Ensenada - Loaded hexs res 10.
Mexicali - Loaded AGEBs and blocks.
Mexicali - Loaded nodes.
Mexicali - Loaded hexs res 8.
Mexicali - Loaded hexs res 9.
Mexicali - Loaded hexs res 10.
Tijuana - Loaded AGEBs and blocks.
Tijuana - Loaded nodes.
Tijuana - Loaded hexs res 8.
Tijuana - Loaded hexs res 9.
Tijuana - Loaded hexs res 10.
La Paz - Loaded AGEBs and blocks.
La Paz - Loaded nodes.
La Paz - Loaded hexs res 8.
La Paz - Loaded hexs res 9.
La Paz - Loaded hexs res 10.
Los Cabos - Loaded AGEBs and blocks.
Los Cabos - Loaded nodes.
Los Cabos - Loaded hexs res 8.
Los Cabos - Loaded hexs res 9.
Los Cabos - Loaded hexs res 10.
Campeche - Loaded AGEBs and blocks.
Campeche - Lo

Unnamed: 0,city,agebs_pop,blocks_pop,nodes_pop,hex_8_pop,hex_8_dens,hex_9_pop,hex_9_dens,hex_10_pop,hex_10_dens
0,Aguascalientes,1042295.0,1042295.0,1.042295e+06,1.042295e+06,37.944300,1.042295e+06,51.345896,1.042295e+06,70.825107
1,Ensenada,397099.0,397099.0,3.970990e+05,3.970990e+05,20.464682,3.970990e+05,28.499899,3.970990e+05,41.943748
2,Mexicali,950970.0,950970.0,9.509698e+05,9.509698e+05,23.144610,9.509698e+05,32.053751,9.509698e+05,46.364442
3,Tijuana,2000259.0,2000259.0,2.000259e+06,2.000259e+06,42.139375,2.000259e+06,50.445648,2.000259e+06,63.831188
4,La Paz,270846.0,270846.0,2.708460e+05,2.708460e+05,13.434341,2.708460e+05,20.462905,2.708460e+05,33.947316
...,...,...,...,...,...,...,...,...,...,...
66,Poza Rica,261251.0,261251.0,2.612510e+05,2.612510e+05,25.443982,2.612510e+05,37.771155,2.612510e+05,53.750747
67,Veracruz,800379.0,800379.0,8.003790e+05,8.003790e+05,40.731140,8.003790e+05,53.910765,8.003790e+05,72.251213
68,Xalapa,671742.0,671742.0,6.717420e+05,6.717420e+05,33.356278,6.717420e+05,49.082198,6.717420e+05,72.397589
69,Merida,1246526.0,1246526.0,1.246526e+06,1.246526e+06,23.309865,1.246526e+06,29.974190,1.246526e+06,43.389719


In [17]:
summary_df['agebs_blocks'] = summary_df['agebs_pop']-summary_df['blocks_pop']
print(f"AGEBs and blocks have always the same info: {summary_df.agebs_blocks.unique()}.")

summary_df['agebs_nodes'] = summary_df['agebs_pop']-summary_df['nodes_pop']
summary_df['agebs_hex8'] = summary_df['agebs_pop']-summary_df['hex_8_pop']
summary_df['agebs_hex9'] = summary_df['agebs_pop']-summary_df['hex_9_pop']
summary_df['agebs_hex10'] = summary_df['agebs_pop']-summary_df['hex_10_pop']

summary_df.head(40)

AGEBs and blocks have always the same info: [0.].


Unnamed: 0,city,agebs_pop,blocks_pop,nodes_pop,hex_8_pop,hex_8_dens,hex_9_pop,hex_9_dens,hex_10_pop,hex_10_dens,agebs_blocks,agebs_nodes,agebs_hex8,agebs_hex9,agebs_hex10
0,Aguascalientes,1042295.0,1042295.0,1042295.0,1042295.0,37.9443,1042295.0,51.345896,1042295.0,70.825107,0.0,0.011923,0.012061,0.01644,0.01207
1,Ensenada,397099.0,397099.0,397099.0,397099.0,20.464682,397099.0,28.499899,397099.0,41.943748,0.0,-0.003269,0.000849,-0.004001,-0.003154
2,Mexicali,950970.0,950970.0,950969.8,950969.8,23.14461,950969.8,32.053751,950969.8,46.364442,0.0,0.189313,0.188983,0.186523,0.189493
3,Tijuana,2000259.0,2000259.0,2000259.0,2000259.0,42.139375,2000259.0,50.445648,2000259.0,63.831188,0.0,-0.023317,-0.017928,-0.024152,-0.022083
4,La Paz,270846.0,270846.0,270846.0,270846.0,13.434341,270846.0,20.462905,270846.0,33.947316,0.0,0.046935,0.047119,0.047758,0.046837
5,Los Cabos,338979.0,338979.0,338979.0,338979.0,16.605507,338979.0,23.010688,338979.0,37.319894,0.0,-0.008736,-0.008982,-0.009419,-0.008493
6,Campeche,267558.0,267558.0,267558.0,267558.0,29.198273,267558.0,41.084687,267558.0,58.749098,0.0,0.022196,0.022025,0.021428,0.022333
7,Laguna,1218759.0,1218759.0,1218759.0,1218759.0,29.449559,1218759.0,39.183587,1218759.0,54.795626,0.0,-0.010152,-0.012254,-0.008842,-0.010262
8,Monclova,364881.0,364881.0,364881.0,364881.0,19.368135,364881.0,26.531213,364881.0,38.925758,0.0,-0.014732,-0.013234,-0.013692,-0.014248
9,Piedras Negras,206172.0,206172.0,206172.0,206172.0,17.495409,206172.0,24.245802,206172.0,34.54314,0.0,0.000418,0.000707,0.000601,0.00022


In [20]:
summary_df.tail(31)

Unnamed: 0,city,agebs_pop,blocks_pop,nodes_pop,hex_8_pop,hex_8_dens,hex_9_pop,hex_9_dens,hex_10_pop,hex_10_dens,agebs_blocks,agebs_nodes,agebs_hex8,agebs_hex9,agebs_hex10
40,Puebla,2654156.0,2654156.0,2654156.0,2654156.0,31.909627,2654156.0,39.569984,2654156.0,54.403183,0.0,0.030782,0.030111,0.029944,0.030518
41,San Martin,287426.0,287426.0,287426.0,287426.0,14.963871,287426.0,22.355155,287426.0,39.003383,0.0,0.006843,0.007771,0.007142,0.006541
42,Tehuacan,338779.0,338779.0,338779.0,338779.0,25.045838,338779.0,37.503018,338779.0,56.797591,0.0,-0.007531,-0.005468,-0.006179,-0.007639
43,Queretaro,1381392.0,1381392.0,1381333.0,1381333.0,30.15167,1381333.0,41.51654,1381333.0,57.822397,0.0,58.927876,58.92596,58.926493,58.928125
44,Cancun,928428.0,928428.0,928428.0,928428.0,39.727208,928428.0,52.906665,928428.0,73.078726,0.0,-0.022365,-0.022211,-0.02029,-0.022424
45,Chetumal,188996.0,188996.0,188996.0,188996.0,20.076579,188996.0,30.199812,188996.0,47.5949,0.0,0.018534,0.019674,0.019308,0.018849
46,Playa,327820.0,327820.0,327820.0,327820.0,38.610445,327820.0,51.396249,327820.0,76.308065,0.0,0.048194,0.044353,0.047032,0.049016
47,SLP,1194744.0,1194744.0,1194744.0,1194744.0,39.09973,1194744.0,50.406901,1194744.0,67.81901,0.0,0.043703,0.046917,0.039971,0.043826
48,Culiacan,886283.0,886283.0,886283.0,886283.0,33.087182,886283.0,44.681298,886283.0,60.105497,0.0,-0.019327,-0.018384,-0.019625,-0.019653
49,Los Mochis,378508.0,378508.0,378439.4,378439.4,19.487359,378439.4,32.450963,378439.4,47.754834,0.0,68.629119,68.627432,68.629944,68.629047


In [21]:
local_save_dir = accesibilidad_urbana + 'data/processed/pop_data/pop_data_comparison.csv'
save = True
if save:
    summary_df.to_csv(local_save_dir)