## Import libraries

In [1]:
accesibilidad_urbana = '../../'

In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(accesibilidad_urbana))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## Load, count and sumarize data

In [3]:
# Summary dataframe
summary_df = pd.DataFrame()

# Parameters
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2020'
year = '2020'

# Load all cities
query = f"SELECT * FROM {metro_schema}.{metro_table}"
metro_gdf = aup.gdf_from_query(query, geometry_col='geometry')
metro_gdf = metro_gdf.set_crs("EPSG:4326")
print(f"Loaded all cities.")

# Iterate over each city
i = 0
city_list = list(metro_gdf.city.unique()) # Full run
#city_list = ['Aguascalientes'] # Test
for city in city_list:

    summary_df.loc[i,'city'] = city # Register data
    
    # 1.1 --------------- EXTRACT CITY DATA FROM METRO_GDF
    # Extract current city
    city_gdf = metro_gdf.loc[metro_gdf.city == city]
    city_gdf = city_gdf.set_crs("EPSG:4326")
    # Create a list with all unique cvegeo_mun ('CVE_ENT'+'CVE_MUN') of current city
    city_gdf['cvegeo_mun'] = city_gdf['CVE_ENT']+city_gdf['CVE_MUN']
    cvegeo_mun_lst = list(city_gdf.cvegeo_mun.unique())
    cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))
    # To avoid error that happens when there's only one MUN in State: 
    # SQL e.g.: <<< SELECT * FROM censo.censo_inegi_{year[:2]}_mza WHERE ("entidad" = '02') AND "mun" IN ('001',) >>>
    # Duplicate mun inside tupple if there's only one MUN.
    if len(cvegeo_mun_lst) >= 2:
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))
    else:
        cvegeo_mun_lst.append(cvegeo_mun_lst[0])
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))

    # 1.2 --------------- LOAD AND COUNT POP DATA (AGEBs and Blocks)
    cols = 'pobtot'
    # Load AGEBs and blocks
    query = f"SELECT {cols} FROM censo.censo_inegi_{year[:2]}_ageb WHERE \"cvegeo_mun\" IN {cvegeo_mun_tpl}"
    pop_ageb_gdf = aup.df_from_query(query)
    query = f"SELECT {cols} FROM censo.censo_inegi_{year[:2]}_mza WHERE \"cvegeo_mun\" IN {cvegeo_mun_tpl}"
    pop_mza_gdf = aup.df_from_query(query)
    # Count population
    summary_df.loc[i,'agebs_pop'] = pop_ageb_gdf['pobtot'].sum() # Register data
    summary_df.loc[i,'blocks_pop'] = pop_mza_gdf['pobtot'].sum() # Register data
    print(f"{city} - Loaded AGEBs and blocks.")
    del pop_ageb_gdf
    del pop_mza_gdf

    # 1.3 --------------- LOAD AND COUNT POP DATA (Nodes)
    try:
        cols = 'pobtot'
        # Load nodes
        query = f"SELECT {cols} FROM censo.pobcenso_inegi_{year[:2]}_mzaageb_node WHERE \"city\" LIKE \'{city}\'"
        pop_nodes_gdf = aup.df_from_query(query)
        # Count population
        summary_df.loc[i,'nodes_pop'] = pop_nodes_gdf['pobtot'].sum() # Register data
        print(f"{city} - Loaded nodes.")
        del pop_nodes_gdf
    except:
        print(f"{city} - Has no nodes data.")

    # 1.4 --------------- LOAD AND COUNT POP DATA (Hexs)
    try:
        cols = 'pobtot,dens_pob_ha'
        res_lst = [8,9,10]
        for res in res_lst:
            # Load nodes
            query = f"SELECT {cols} FROM censo.pobcenso_inegi_{year[:2]}_mzaageb_hex WHERE \"city\" = '{city}\' AND \"res\" = '{res}\'"
            pop_hexs_gdf = aup.df_from_query(query)
            # Count population
            summary_df.loc[i,f'hex_{res}_pop'] = pop_hexs_gdf['pobtot'].sum() # Register data
            # Mean density
            summary_df.loc[i,f'hex_{res}_dens'] = pop_hexs_gdf['dens_pob_ha'].mean() # Register data
            print(f"{city} - Loaded hexs res {res}.")
            del pop_hexs_gdf
    except:
        print(f"{city} - Has no hexs data.")

    i+=1

summary_df

Loaded all cities.
Aguascalientes - Loaded AGEBs and blocks.
Aguascalientes - Loaded nodes.
Aguascalientes - Loaded hexs res 8.
Aguascalientes - Loaded hexs res 9.
Aguascalientes - Loaded hexs res 10.
Ensenada - Loaded AGEBs and blocks.
Ensenada - Loaded nodes.
Ensenada - Loaded hexs res 8.
Ensenada - Loaded hexs res 9.
Ensenada - Loaded hexs res 10.
Mexicali - Loaded AGEBs and blocks.
Mexicali - Loaded nodes.
Mexicali - Loaded hexs res 8.
Mexicali - Loaded hexs res 9.
Mexicali - Loaded hexs res 10.
Tijuana - Loaded AGEBs and blocks.
Tijuana - Loaded nodes.
Tijuana - Loaded hexs res 8.
Tijuana - Loaded hexs res 9.
Tijuana - Loaded hexs res 10.
La Paz - Loaded AGEBs and blocks.
La Paz - Loaded nodes.
La Paz - Loaded hexs res 8.
La Paz - Loaded hexs res 9.
La Paz - Loaded hexs res 10.
Los Cabos - Loaded AGEBs and blocks.
Los Cabos - Loaded nodes.
Los Cabos - Loaded hexs res 8.
Los Cabos - Loaded hexs res 9.
Los Cabos - Loaded hexs res 10.
Campeche - Loaded AGEBs and blocks.
Campeche - Lo

Exception during reset or similar
Traceback (most recent call last):
  File "/opt/conda/envs/gds/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 145, in __init__
    self._dbapi_connection = engine.raw_connection()
  File "/opt/conda/envs/gds/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 3292, in raw_connection
    return self.pool.connect()
  File "/opt/conda/envs/gds/lib/python3.9/site-packages/sqlalchemy/pool/base.py", line 452, in connect
    return _ConnectionFairy._checkout(self)
  File "/opt/conda/envs/gds/lib/python3.9/site-packages/sqlalchemy/pool/base.py", line 1269, in _checkout
    fairy = _ConnectionRecord.checkout(pool)
  File "/opt/conda/envs/gds/lib/python3.9/site-packages/sqlalchemy/pool/base.py", line 716, in checkout
    rec = pool._do_get()
  File "/opt/conda/envs/gds/lib/python3.9/site-packages/sqlalchemy/pool/impl.py", line 170, in _do_get
    self._dec_overflow()
  File "/opt/conda/envs/gds/lib/python3.9/site-packages/sqlalchemy/util/l

OperationalError: (psycopg2.OperationalError) connection to server at "hippocampus.cswst4rid7eb.us-east-2.rds.amazonaws.com" (18.190.20.157), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?
connection to server at "hippocampus.cswst4rid7eb.us-east-2.rds.amazonaws.com" (18.190.20.157), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [9]:
summary_df['blocks_agebs'] = summary_df['blocks_pop']-summary_df['agebs_pop']

summary_df['blocks_nodes'] = summary_df['blocks_pop']-summary_df['nodes_pop']
summary_df['blocks_hex8'] = summary_df['blocks_pop']-summary_df['hex_8_pop']
summary_df['blocks_hex9'] = summary_df['blocks_pop']-summary_df['hex_9_pop']
summary_df['blocks_hex10'] = summary_df['blocks_pop']-summary_df['hex_10_pop']

summary_df.head(40)

Unnamed: 0,city,agebs_pop,blocks_pop,nodes_pop,hex_8_pop,hex_8_dens,hex_9_pop,hex_9_dens,hex_10_pop,hex_10_dens,blocks_nodes,blocks_hex8,blocks_hex9,blocks_hex10,blocks_agebs
0,Aguascalientes,1042295.0,1043905.0,1043905.0,1043905.0,38.002939,1043905.0,51.645011,1043905.0,71.190394,0.02348462,0.01904223,0.02606417,0.02403932,1610.0
1,Ensenada,397099.0,396709.0,396709.0,396709.0,20.444583,396709.0,28.67294,396709.0,42.307928,-0.01736394,-0.01632396,-0.01747433,-0.0168884,-390.0
2,Mexicali,950970.0,959257.0,959256.7,959256.7,22.453421,959256.7,31.651314,959256.7,46.078042,0.2740791,0.2705197,0.2694871,0.2741234,8287.0
3,Tijuana,2000259.0,2002469.0,2002469.0,2002469.0,41.885156,2002469.0,50.672541,2002469.0,64.161687,-0.02957222,-0.03127077,-0.02867546,-0.02929308,2210.0
4,La Paz,270846.0,270681.0,270681.0,270681.0,13.663785,270681.0,20.864971,270681.0,34.546294,6.533141e-05,0.004495538,0.0002852159,6.78926e-05,-165.0
5,Los Cabos,338979.0,338675.0,338675.0,338675.0,16.880406,338675.0,24.377736,338675.0,39.935697,-0.002491545,-0.00658574,-0.0009013808,-0.002633244,-304.0
6,Campeche,267558.0,267387.0,267387.0,267387.0,29.42685,267387.0,41.549685,267387.0,59.09896,0.01649003,0.01771487,0.01786993,0.01658175,-171.0
7,Laguna,1218759.0,1218686.0,1218686.0,1218686.0,29.634965,1218686.0,39.417845,1218686.0,55.162776,0.0738477,0.07250686,0.07437077,0.07303434,-73.0
8,Monclova,364881.0,367434.0,367434.0,367434.0,19.238868,367434.0,26.573681,367434.0,38.978046,-0.005825941,-0.006369926,-0.006783308,-0.005574614,2553.0
9,Piedras Negras,206172.0,206163.0,206163.0,206163.0,17.494641,206163.0,24.315004,206163.0,34.612875,-0.0004939427,-0.00168435,-0.000404281,-0.000165691,-9.0


In [13]:
summary_df.tail(26)

Unnamed: 0,city,agebs_pop,blocks_pop,nodes_pop,hex_8_pop,hex_8_dens,hex_9_pop,hex_9_dens,hex_10_pop,hex_10_dens,blocks_nodes,blocks_hex8,blocks_hex9,blocks_hex10,blocks_agebs
40,Puebla,2654156.0,2647940.0,2647940.0,2647940.0,31.899048,2647940.0,39.583086,2647940.0,54.441507,0.04079699,0.03348014,0.03904257,0.04077408,-6216.0
41,San Martin,287426.0,285397.0,285397.0,285397.0,14.858234,285397.0,22.343119,285397.0,38.954284,-0.01413995,-0.01330954,-0.01280071,-0.01425192,-2029.0
42,Tehuacan,338779.0,337788.0,337788.0,337788.0,25.126734,337788.0,37.541128,337788.0,56.776551,-0.02002805,-0.0229004,-0.01916919,-0.0200574,-991.0
43,Queretaro,1381392.0,1379350.0,0.0,0.0,,0.0,,0.0,,1379350.0,1379350.0,1379350.0,1379350.0,-2042.0
44,Cancun,928428.0,928339.0,928339.1,928339.1,40.099181,928339.1,53.059133,928339.1,73.346812,-0.1120071,-0.1180139,-0.1128181,-0.1116748,-89.0
45,Chetumal,188996.0,188777.0,188777.0,188777.0,20.374186,188777.0,30.267272,188777.0,47.666899,0.004362037,0.001793865,0.00470425,0.004442726,-219.0
46,Playa,327820.0,327652.0,327652.0,327652.0,38.590589,327652.0,51.53882,327652.0,76.802945,0.02670948,0.02864301,0.02495929,0.02605055,-168.0
47,SLP,1194744.0,1194500.0,1194500.0,1194500.0,39.201861,1194500.0,50.579772,1194500.0,68.069126,-0.01601504,-0.01691211,-0.01694176,-0.01745588,-244.0
48,Culiacan,886283.0,886053.0,886053.1,886053.1,33.187409,886053.1,44.954043,886053.1,60.650179,-0.06967195,-0.07248399,-0.06821082,-0.06960061,-230.0
49,Los Mochis,378508.0,380961.0,0.0,0.0,,0.0,,0.0,,380961.0,380961.0,380961.0,380961.0,2453.0


In [21]:
local_save_dir = accesibilidad_urbana + 'data/processed/pop_data/pop_data_comparison.csv'
save = True
if save:
    summary_df.to_csv(local_save_dir)