# 07 - script22_output_comparison

This notebook __compares the pobtot__ (or any other given pop. col) __for every city__ in metro_gdf_2020 by using:

* f'censo_inegi_{year[2:]}_ageb'
* f'pobvoronoi_inegi_{year[2:]}_mzaageb_mza' #From Script 22
* f'pobvoronoi_inegi_{year[2:]}_mzaageb_node' #From Script 22
* f'pobvoronoi_inegi_{year[2:]}_mzaageb_hex' #From Script 22

__RESULTS:__
* 2010 comparison is a __success__ (Biggest diff is Laguna, -3.39 people)

## Import libraries

In [1]:
main_folder_path = '../../../'

In [2]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

from scipy.spatial import Voronoi, voronoi_plot_2d
import shapely

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(main_folder_path))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup
else:
    import aup

## Notebook config

In [22]:
# Year to analyse
year = '2010'

# --------------- Database
# Cities
metro_schema = 'metropolis'
metro_table = 'metro_gdf_2020'
# Population data
pop_schema = 'sociodemografico'
# Population data - AGEBs
agebs_table = f'censo_inegi_{year[2:]}_ageb'
# Population data - Script 22 blocks
blocks_save_table = f'pobvoronoi_inegi_{year[2:]}_mzaageb_mza'
# Population data - Script 22 nodes
nodes_save_table = f'pobvoronoi_inegi_{year[2:]}_mzaageb_node'
# Population data - Script 22 hexs
hexs_save_table = f'pobvoronoi_inegi_{year[2:]}_mzaageb_hex'

## Load data

### Load all cities

In [5]:
metro_query = f"SELECT * FROM {metro_schema}.{metro_table}"
metro_gdf = aup.gdf_from_query(metro_query, geometry_col='geometry')

# Show
print(metro_gdf.shape)
metro_gdf.head(2)

(367, 6)


Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,NOMGEO,geometry,city
0,1001,1,1,Aguascalientes,"POLYGON ((-102.10641 22.06035, -102.10368 22.0...",Aguascalientes
1,1005,1,5,Jesús María,"POLYGON ((-102.33568 22.05067, -102.33348 22.0...",Aguascalientes


## __2010 comparison__ [Success]

In [15]:
# Prepare for comparison
comparison_df = pd.DataFrame()
i = 0
compare_col = 'pobtot'

# Iterate over each city loading different pop data and adding totals to comparison
city_lst = list(metro_gdf.city.unique()) #['Aguascalientes'] 
for city in city_lst:
    
     # --------------- SELECT CITY
    city_gdf = metro_gdf.loc[metro_gdf.city==city].copy()
    city_gdf = city_gdf.set_crs("EPSG:4326")

    # --------------- LOAD DATA

    # Load city's AGEBs
    print(f"{city} - Loading AGEBs.")
    # Create a tupple from a list with all unique cvegeo_mun ('CVE_ENT'+'CVE_MUN') of current city
    city_gdf['cvegeo_mun'] = city_gdf['CVE_ENT']+city_gdf['CVE_MUN']
    cvegeo_mun_lst = list(city_gdf.cvegeo_mun.unique())
    # To avoid error that happens when there's only one MUN in State: 
    # e.g.: <<< SELECT * FROM sociodemografico.censo_inegi_{year[2:]}_mza WHERE ("entidad" = '02') AND "mun" IN ('001',) >>>
    # Duplicate mun inside tupple if there's only one MUN.
    if len(cvegeo_mun_lst) >= 2:
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))
    else:
        cvegeo_mun_lst.append(cvegeo_mun_lst[0])
        cvegeo_mun_tpl = str(tuple(cvegeo_mun_lst))
    ageb_query = f"SELECT * FROM {pop_schema}.{agebs_table} WHERE \"cvegeo_mun\" IN {cvegeo_mun_tpl}"
    pop_ageb_gdf = aup.gdf_from_query(ageb_query, geometry_col='geometry')
    pop_ageb_gdf = pop_ageb_gdf.set_crs("EPSG:4326")

    # Load Script 22's blocks
    print(f"{city} - Loading Script 22's blocks.")
    blocks_query = f"SELECT * FROM {pop_schema}.{blocks_save_table} WHERE \"city\" LIKE \'{city}\'"
    s22_blocks_gdf = aup.gdf_from_query(blocks_query, geometry_col='geometry')

    # Load Script 22's nodes
    print(f"{city} - Loading Script 22's nodes.")
    nodes_query = f"SELECT * FROM {pop_schema}.{nodes_save_table} WHERE \"city\" LIKE \'{city}\'"
    s22_nodes_gdf = aup.gdf_from_query(nodes_query, geometry_col='geometry')

    # Load Script 22's hexs
    print(f"{city} - Loading Script 22's hexs.")
    hexs_query = f"SELECT * FROM {pop_schema}.{hexs_save_table} WHERE \"city\" LIKE \'{city}\'"
    s22_hexs_gdf = aup.gdf_from_query(hexs_query, geometry_col='geometry')
    s22_hexsres8_gdf = s22_hexs_gdf.loc[s22_hexs_gdf.res==8].copy()
    s22_hexsres9_gdf = s22_hexs_gdf.loc[s22_hexs_gdf.res==9].copy()
    s22_hexsres10_gdf = s22_hexs_gdf.loc[s22_hexs_gdf.res==10].copy()
    del s22_hexs_gdf

    # --------------- REGISTER DATA
    print(f"{city} - Registering Script 22's pop data.")
    comparison_df.loc[i,'city'] = city
    
    agebs_value = pop_ageb_gdf[compare_col].sum()
    comparison_df.loc[i,'agebs'] = agebs_value
    
    blocks_value = pop_ageb_gdf[compare_col].sum()
    comparison_df.loc[i,'blocks'] = blocks_value
    
    nodes_value = pop_ageb_gdf[compare_col].sum()
    comparison_df.loc[i,'nodes'] = nodes_value
    
    hexsres8_value = s22_hexsres8_gdf[compare_col].sum()
    comparison_df.loc[i,'hexsres8'] = hexsres8_value
    
    hexsres9_value = s22_hexsres9_gdf[compare_col].sum()
    comparison_df.loc[i,'hexsres9'] = hexsres9_value
    
    hexsres10_value = s22_hexsres10_gdf[compare_col].sum()
    comparison_df.loc[i,'hexsres10'] = hexsres10_value
    
    i+=1

Aguascalientes - Loading AGEBs.
Aguascalientes - Loading Script 22's blocks.
Aguascalientes - Loading Script 22's nodes.
Aguascalientes - Loading Script 22's hexs.
Aguascalientes - Registering Script 22's pop data.
Ensenada - Loading AGEBs.
Ensenada - Loading Script 22's blocks.
Ensenada - Loading Script 22's nodes.
Ensenada - Loading Script 22's hexs.
Ensenada - Registering Script 22's pop data.
Mexicali - Loading AGEBs.
Mexicali - Loading Script 22's blocks.
Mexicali - Loading Script 22's nodes.
Mexicali - Loading Script 22's hexs.
Mexicali - Registering Script 22's pop data.
Tijuana - Loading AGEBs.
Tijuana - Loading Script 22's blocks.
Tijuana - Loading Script 22's nodes.
Tijuana - Loading Script 22's hexs.
Tijuana - Registering Script 22's pop data.
La Paz - Loading AGEBs.
La Paz - Loading Script 22's blocks.
La Paz - Loading Script 22's nodes.
La Paz - Loading Script 22's hexs.
La Paz - Registering Script 22's pop data.
Los Cabos - Loading AGEBs.
Los Cabos - Loading Script 22's b

In [20]:
comparison_df['agebs-blocks'] = round(comparison_df['agebs']-comparison_df['blocks'],2)
comparison_df['agebs-nodes'] = round(comparison_df['agebs']-comparison_df['nodes'],2)
comparison_df['agebs-hexsres8'] = round(comparison_df['agebs']-comparison_df['hexsres8'],2)
comparison_df['agebs-hexsres9'] = round(comparison_df['agebs']-comparison_df['hexsres9'],2)
comparison_df['agebs-hexsres10'] = round(comparison_df['agebs']-comparison_df['hexsres10'],2)

# Show first 35
comparison_df.head(35)

Unnamed: 0,city,agebs,blocks,nodes,hexsres8,hexsres9,hexsres10,agebs-blocks,agebs-nodes,agebs-hexsres8,agebs-hexsres9,agebs-hexsres10
0,Aguascalientes,828378.0,828378.0,828378.0,828378.0,828378.0,828378.0,0.0,0.0,0.04,0.04,0.04
1,Ensenada,398122.0,398122.0,398122.0,398122.0,398122.0,398122.0,0.0,0.0,-0.03,-0.03,-0.03
2,Mexicali,838116.0,838116.0,838116.0,838115.8,838115.8,838115.8,0.0,0.0,0.24,0.24,0.24
3,Tijuana,1595559.0,1595559.0,1595559.0,1595559.0,1595559.0,1595559.0,0.0,0.0,-0.0,-0.01,-0.01
4,La Paz,225022.0,225022.0,225022.0,225022.0,225022.0,225022.0,0.0,0.0,-0.02,-0.02,-0.02
5,Los Cabos,215545.0,215545.0,215545.0,215545.0,215545.0,215545.0,0.0,0.0,0.02,0.02,0.02
6,Campeche,233864.0,233864.0,233864.0,233864.0,233864.0,233864.0,0.0,0.0,-0.03,-0.03,-0.03
7,Laguna,1066672.0,1066672.0,1066672.0,1066675.0,1066675.0,1066675.0,0.0,0.0,-3.39,-3.38,-3.38
8,Monclova,330608.0,330608.0,330608.0,330608.0,330608.0,330608.0,0.0,0.0,-0.01,-0.01,-0.01
9,Piedras Negras,177291.0,177291.0,177291.0,177291.0,177291.0,177291.0,0.0,0.0,0.05,0.05,0.05


In [21]:
# Show last 36
comparison_df.tail(36)

Unnamed: 0,city,agebs,blocks,nodes,hexsres8,hexsres9,hexsres10,agebs-blocks,agebs-nodes,agebs-hexsres8,agebs-hexsres9,agebs-hexsres10
35,Cuautla,249448.0,249448.0,249448.0,249448.0,249448.0,249448.0,0.0,0.0,0.0,0.0,0.0
36,Cuernavaca,884145.0,884145.0,884145.0,884145.0,884145.0,884145.0,0.0,0.0,-0.0,-0.0,-0.0
37,Tepic,383232.0,383232.0,383232.0,383232.0,383232.0,383232.0,0.0,0.0,-0.0,-0.0,-0.0
38,Monterrey,4131710.0,4131710.0,4131710.0,4131710.0,4131710.0,4131710.0,0.0,0.0,-0.03,-0.04,-0.04
39,Oaxaca,586495.0,586495.0,586495.0,586495.0,586495.0,586495.0,0.0,0.0,0.0,0.01,0.0
40,Puebla,2290801.0,2290801.0,2290801.0,2290801.0,2290801.0,2290801.0,0.0,0.0,-0.02,-0.03,-0.03
41,San Martin,232728.0,232728.0,232728.0,232728.0,232728.0,232728.0,0.0,0.0,-0.01,-0.01,-0.01
42,Tehuacan,277745.0,277745.0,277745.0,277745.0,277745.0,277745.0,0.0,0.0,-0.01,-0.01,-0.01
43,Queretaro,920993.0,920993.0,920993.0,920993.0,920993.0,920993.0,0.0,0.0,0.02,0.01,0.01
44,Cancun,674206.0,674206.0,674206.0,674206.0,674206.0,674206.0,0.0,0.0,0.03,0.03,0.02
