# Blocks 800

This notebook runs parts of Script 22 and function calculate_censo_nan_values() in order to print logs on the identification of blocks_800 AGEBs.
Then one random AGEB containing blocks_800 is loaded to __confirm that blocks_800 are needed to fill the pop data of blocks when using data from AGEBs 2020__.

In [1]:
accesibilidad_urbana = '../../../'

import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

# Para obtener el punto central de una línea en los tests,
# Y para convertir MultiLineStrings a LineStrings.
from shapely.geometry import LineString, MultiLineString

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(accesibilidad_urbana))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup

## Running Script 22 [up to calculate_censo_nan_values()] [up to where AGEBs and blocks 800 are identified]

In [2]:
def main(city,save=False,local_save=True):

	##########################################################################################
	# STEP 1: LOAD DATA
    aup.log("--"*30)
    aup.log("--- LOADING CITY POP DATA.")
    
    # 1.1 --------------- CREATE AREA OF INTEREST FOR CITY 
    city_gdf = metro_gdf.loc[metro_gdf.city == city]
    city_gdf = city_gdf.set_crs("EPSG:4326")
    aoi = city_gdf.dissolve()
    
    # 1.2 --------------- LOAD POP DATA (AGEBs and Blocks)
    aup.log("--- Loading blocks and AGEBs for area of interest.")
    pop_ageb_gdf = gpd.GeoDataFrame()
    pop_mza_gdf = gpd.GeoDataFrame()

    # Create a list with all unique cvegeo_mun ('CVE_ENT'+'CVE_MUN') of current city
    city_gdf['cvegeo_mun'] = city_gdf['CVE_ENT']+city_gdf['CVE_MUN']
    cvegeo_mun_lst = list(city_gdf.cvegeo_mun.unique())
    # To avoid error that happens when there's only one MUN in State: 
    # SQL e.g.: <<< SELECT * FROM censo.censo_inegi_{year[:2]}_mza WHERE ("entidad" = '02') AND "mun" IN ('001',) >>>
    # Duplicate mun inside tupple if there's only one MUN.
    if len(cvegeo_mun_lst) >= 2:
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))
    else:
        cvegeo_mun_lst.append(cvegeo_mun_lst[0])
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))

    # Load AGEBs and blocks
    query = f"SELECT * FROM censo.censo_inegi_{year[:2]}_ageb WHERE \"cvegeo_mun\" IN {cvegeo_mun_tpl}"
    pop_ageb_gdf = aup.gdf_from_query(query, geometry_col='geometry')
    query = f"SELECT * FROM censo.censo_inegi_{year[:2]}_ageb WHERE \"cvegeo_mun\" IN {cvegeo_mun_tpl}"
    pop_mza_gdf = aup.gdf_from_query(query, geometry_col='geometry')

    # Set CRS
    pop_ageb_gdf = pop_ageb_gdf.set_crs("EPSG:4326")
    pop_mza_gdf = pop_mza_gdf.set_crs("EPSG:4326")
    aup.log(f"--- Loaded AGEBs with total population {pop_ageb_gdf['pobtot'].sum()} for area of interest.")
    aup.log(f"--- Loaded blocks with total population {pop_mza_gdf['pobtot'].sum()} for area of interest.")
    
    ##########################################################################################
	# STEP 2: CALCULATE NaN VALUES for pop fields (most of them, check function) of gdf containing blocks.
    aup.log("--"*30)
    aup.log(f"--- CALCULATING NAN VALUES FOR POP FIELDS IN {city.upper()}.")
    
    # 2.1 --------------- CALCULATE_CENSO_NAN_VALUES FUNCTION
    pop_mza_gdf_calc = aup.calculate_censo_nan_values_v1(pop_ageb_gdf,pop_mza_gdf,year=year,extended_logs=False)

In [3]:
# ------------------------------ BASE DATA REQUIRED ------------------------------
# Cities
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2020'
# Year of analysis
year = '2020' # '2010' or '2020'. ('2010' still WIP, not tested)
# List of skip cities (If failed / want to skip city)
skip_city_list = ['CDMX', 'ZMVM']
# Hexgrid res of output
res_list = [8,9,10] #Only 8,9,10 and 11 available, run 8 and 9 only for prox. analysis v2.

# Save output to database?
save = False
save_schema = 'censo'
nodes_save_table = f'pobcenso_inegi_{year[:2]}_mzaageb_node'
save_table = f'pobcenso_inegi_{year[:2]}_mzaageb_hex'

# Save outputs to local? (Make sure directory exists)
local_save = False
local_save_dir = f"../data/processed/pop_data/"

# Test - (If testing, Script runs res 8 for one city ONLY and saves it ONLY locally, adding the word 'test' at the beggining of the outputs.)
test = False
test_city = 'Aguascalientes'

# ------------------------------ SCRIPT ------------------------------
# If test,
if test:
    # Simplifies script parameters
    skip_city_list = []
    res_list = [8]
    save = False
    local_save = True
    # Only loads one city
    missing_cities_list = [test_city]
    i = 0
    k = len(missing_cities_list)
    city = test_city
    query = f"SELECT * FROM {metro_schema}.{metro_table} WHERE \"city\" LIKE \'{city}\'"
    metro_gdf = aup.gdf_from_query(query, geometry_col='geometry')
    metro_gdf = metro_gdf.set_crs("EPSG:4326")

    aup.log(f"Processing test for {missing_cities_list} at res {res_list}.")

# If not test, runs Mexico's cities
else:
    # Load cities (municipalities)
    query = f"SELECT * FROM {metro_schema}.{metro_table}"
    metro_gdf = aup.gdf_from_query(query, geometry_col='geometry')
    metro_gdf = metro_gdf.set_crs("EPSG:4326")
    # Create a city list
    city_list = list(metro_gdf.city.unique())
    k = len(city_list)
    aup.log(f'--- Loaded city list with {k} cities.')

    # Prevent cities being analyzed several times in case of a crash
    processed_city_list = []
    try:
        query = f"SELECT city FROM {save_schema}.{save_table}"
        cities_processed = aup.df_from_query(query)
        processed_city_list = list(cities_processed.city.unique())
    except:
        pass

    # LOG CODE - Print progress of script so far
    missing_cities_list = []
    for city in city_list:
        if city not in processed_city_list:
            missing_cities_list.append(city)
    i = len(processed_city_list)
    aup.log(f'--- Already processed ({i}/{k}) cities.')
    aup.log(f'--- Missing procesing for cities: {missing_cities_list}')

# Main function run
for city in missing_cities_list:
    if city not in skip_city_list:
        print("--"*30)
        i = i + 1
        if i > 35:
            print(f"--- Skipping city {i}/{k}: {city}")
        else:
            print(f"--- Starting city {i}/{k}: {city}")
            main(city, save, local_save)

------------------------------------------------------------
--- Starting city 1/71: Aguascalientes
INSPECTING AGEBs.
FOUND 13 blocks_800 in current municipalities.
ADDED 12 blocks_800 for 12 AGEBs.
Printing AGEBs with blocks_800 list:
['0100100013721', '0100100014113', '010010237458A', '0100102374594', '0100102933929', '0100103674607', '0100103674611', '0100104793933', '0100500010497', '0100500010656', '0101100110193', '0101100110206']
REMOVED blocks_800 from 1 other AGEBs not available in pop_gdf_ageb.
Printing AGEBs from blocks_800 that won't be used.
['0100120501903']
------------------------------------------------------------
--- Starting city 2/71: Ensenada
INSPECTING AGEBs.
FOUND 32 blocks_800 in current municipalities.
ADDED 32 blocks_800 for 32 AGEBs.
Printing AGEBs with blocks_800 list:
['0200100010100', '0200100010149', '0200100010153', '0200100010168', '0200100010172', '0200100010187', '0200100010191', '0200100010204', '0200100011255', '0200100011414', '020010001158A', '02

In [4]:
# ------------------------------ BASE DATA REQUIRED ------------------------------
# Cities
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2020'
# Year of analysis
year = '2020' # '2010' or '2020'. ('2010' still WIP, not tested)
# List of skip cities (If failed / want to skip city)
skip_city_list = ['CDMX', 'ZMVM']
# Hexgrid res of output
res_list = [8,9,10] #Only 8,9,10 and 11 available, run 8 and 9 only for prox. analysis v2.

# Save output to database?
save = False
save_schema = 'censo'
nodes_save_table = f'pobcenso_inegi_{year[:2]}_mzaageb_node'
save_table = f'pobcenso_inegi_{year[:2]}_mzaageb_hex'

# Save outputs to local? (Make sure directory exists)
local_save = False
local_save_dir = f"../data/processed/pop_data/"

# Test - (If testing, Script runs res 8 for one city ONLY and saves it ONLY locally, adding the word 'test' at the beggining of the outputs.)
test = False
test_city = 'Aguascalientes'

# ------------------------------ SCRIPT ------------------------------
# If test,
if test:
    # Simplifies script parameters
    skip_city_list = []
    res_list = [8]
    save = False
    local_save = True
    # Only loads one city
    missing_cities_list = [test_city]
    i = 0
    k = len(missing_cities_list)
    city = test_city
    query = f"SELECT * FROM {metro_schema}.{metro_table} WHERE \"city\" LIKE \'{city}\'"
    metro_gdf = aup.gdf_from_query(query, geometry_col='geometry')
    metro_gdf = metro_gdf.set_crs("EPSG:4326")

    aup.log(f"Processing test for {missing_cities_list} at res {res_list}.")

# If not test, runs Mexico's cities
else:
    # Load cities (municipalities)
    query = f"SELECT * FROM {metro_schema}.{metro_table}"
    metro_gdf = aup.gdf_from_query(query, geometry_col='geometry')
    metro_gdf = metro_gdf.set_crs("EPSG:4326")
    # Create a city list
    city_list = list(metro_gdf.city.unique())
    k = len(city_list)
    aup.log(f'--- Loaded city list with {k} cities.')

    # Prevent cities being analyzed several times in case of a crash
    processed_city_list = []
    try:
        query = f"SELECT city FROM {save_schema}.{save_table}"
        cities_processed = aup.df_from_query(query)
        processed_city_list = list(cities_processed.city.unique())
    except:
        pass

    # LOG CODE - Print progress of script so far
    missing_cities_list = []
    for city in city_list:
        if city not in processed_city_list:
            missing_cities_list.append(city)
    i = len(processed_city_list)
    aup.log(f'--- Already processed ({i}/{k}) cities.')
    aup.log(f'--- Missing procesing for cities: {missing_cities_list}')

# Main function run
for city in missing_cities_list:
    if city not in skip_city_list:
        print("--"*40)
        i = i + 1
        if i < 35:
            print(f"--- Skipping city {i}/{k}: {city}")
        else:
            print(f"--- Starting city {i}/{k}: {city}")
            main(city, save, local_save)

--------------------------------------------------------------------------------
--- Skipping city 1/71: Aguascalientes
--------------------------------------------------------------------------------
--- Skipping city 2/71: Ensenada
--------------------------------------------------------------------------------
--- Skipping city 3/71: Mexicali
--------------------------------------------------------------------------------
--- Skipping city 4/71: Tijuana
--------------------------------------------------------------------------------
--- Skipping city 5/71: La Paz
--------------------------------------------------------------------------------
--- Skipping city 6/71: Los Cabos
--------------------------------------------------------------------------------
--- Skipping city 7/71: Campeche
--------------------------------------------------------------------------------
--- Skipping city 8/71: Laguna
--------------------------------------------------------------------------------
--- S

## Random AGEB example

In [6]:
#Example
# Blocks from AGEB '0100100013721' were found in Aguascalientes and were added (Its AGEB has geometry)
ageb_test = '0100100013721'

In [12]:
# Blocks
blocks_schema = 'censo'
blocks_table = 'censo_inegi_20_mza'
query = f"SELECT * FROM {blocks_schema}.{blocks_table} WHERE \"cvegeo_ageb\" LIKE \'{ageb_test}\'"
blocks_gdf = aup.gdf_from_query(query, geometry_col='geometry')
blocks_gdf = blocks_gdf.set_crs("EPSG:4326")
# Show
print(blocks_gdf.shape)
blocks_gdf.head(2)

(45, 237)


Unnamed: 0,nom_ent,nom_mun,nom_loc,pobtot,pobfem,pobmas,p_0a2,p_0a2_f,p_0a2_m,p_3ymas,...,loc,ageb,mza,cvegeo_mun,cvegeo_loc,cvegeo_ageb,cvegeo_mza,ambito,tipomza,geometry
0,Aguascalientes,Aguascalientes,Aguascalientes,184,91.0,93.0,19.0,9.0,10.0,165.0,...,1,3721,37,1001,10010001,100100013721,100100013721037,Urbana,Típica,"POLYGON ((-102.23579 21.91211, -102.23571 21.9..."
1,Aguascalientes,Aguascalientes,Aguascalientes,134,72.0,62.0,7.0,4.0,3.0,127.0,...,1,3721,1,1001,10010001,100100013721,100100013721001,Urbana,Típica,"POLYGON ((-102.23863 21.91033, -102.23852 21.9..."


In [15]:
# AGEB
ageb_schema = 'censo'
ageb_table = 'censo_inegi_20_ageb'
query = f"SELECT * FROM {ageb_schema}.{ageb_table} WHERE \"cvegeo_ageb\" LIKE \'{ageb_test}\'"
ageb_gdf = aup.gdf_from_query(query, geometry_col='geometry')
ageb_gdf = ageb_gdf.set_crs("EPSG:4326")
# Show
print(ageb_gdf.shape)
ageb_gdf.head(2)

(1, 235)


Unnamed: 0,nom_ent,nom_mun,nom_loc,pobtot,pobfem,pobmas,p_0a2,p_0a2_f,p_0a2_m,p_3ymas,...,entidad,mun,loc,ageb,mza,cvegeo_mun,cvegeo_loc,cvegeo_ageb,cvegeo_mza,geometry
0,Aguascalientes,Aguascalientes,Total AGEB urbana,5275,2676.0,2599.0,400.0,181.0,219.0,4875.0,...,1,1,1,3721,0,1001,10010001,100100013721,100100013721000,"MULTIPOLYGON (((-102.24044 21.91263, -102.2405..."


In [16]:
# Load blocks 800
accesibilidad_urbana = "../../../"
input_path = accesibilidad_urbana + "data/external/census/"
census_2020_blocks800 = pd.read_csv(input_path + "census_2020_blocks800.csv")
# Filter for AGEB test
blocks800_df = census_2020_blocks800.loc[census_2020_blocks800.cvegeo_ageb==ageb_test]

# Show
print(census_2020_blocks800.shape)
census_2020_blocks800.head(2)

(8791, 234)


Unnamed: 0,nom_ent,nom_mun,nom_loc,pobtot,pobfem,pobmas,p_0a2,p_0a2_f,p_0a2_m,p_3ymas,...,vph_sintic,entidad,mun,loc,ageb,mza,cvegeo_mun,cvegeo_loc,cvegeo_ageb,cvegeo_mza
0,Aguascalientes,Aguascalientes,Aguascalientes,9,,,,,,,...,,1,1,1,3721,800,1001,10010001,100100013721,100100013721800
1,Aguascalientes,Aguascalientes,Aguascalientes,17,8.0,9.0,0.0,0.0,0.0,17.0,...,0.0,1,1,1,4113,800,1001,10010001,100100014113,100100014113800


In [18]:
print(f"Population in blocks: {blocks_gdf.pobtot.sum()}.")
print(f"Population in AGEB: {ageb_gdf.pobtot.sum()}.<-----")
print(f"Population in blocks 800: {blocks800_df.pobtot.sum()}.")
print(f"Sum of blocks + blocks 800: {blocks_gdf.pobtot.sum()+blocks800_df.pobtot.sum()}.<-----")

Population in blocks: 5266.
Population in AGEB: 5275.<-----
Population in blocks 800: 9.
Sum of blocks + blocks 800: 5275.<-----
