In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

from shapely import wkt

import matplotlib.pyplot as plt
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



## Part 1: Create new gdf in db that contains pop data by city.

In [2]:
def main(city, cvegeo_list, save = True):
    
    print(f'STARTING ANALYSIS FOR {city}.')
    print(f'{city} con {len(cvegeo_list)} cvegeos:')
    print(cvegeo_list)
    
    #--------------- DOWNLOAD DATA ---------------
    # Download cd_cuidadoras for city
    schema = 'prox_analysis'
    table = 'cd_cuidadoras_hexres8'
    query = f"SELECT * FROM {schema}.{table}  WHERE \"city\" LIKE \'{city}\'"
    cd_cuidadoras = aup.gdf_from_query(query, geometry_col='geometry')
    
    pob_tot = cd_cuidadoras.pobtot.sum()
    
    print(f'Downloaded cd_cuidadoras data with a total of {pob_tot} persons.')
    
    # Download hex_pop for city
    schema = 'censo'
    table = 'hex_bins_pop_2020'

    hex_pop = gpd.GeoDataFrame()
    i = 1
    for cvegeo in cvegeo_list:
        print(f'Downloading pop data for CVEGEO {cvegeo}, {i} of {len(cvegeo_list)}.')
        
        query = f"SELECT * FROM {schema}.{table} WHERE \"CVEGEO\" LIKE \'{cvegeo}%%\'"
        hex_tmp = aup.gdf_from_query(query, geometry_col='geometry')
        hex_pop = pd.concat([hex_pop, hex_tmp],
        ignore_index = True, axis = 0)
        i = i + 1
        
        
    pob_tot = hex_pop.pobtot.sum()
    
    print(f'Downloaded hex_pop data for all cvegeos, with a total of {pob_tot} persons.')
    
    # Save disk space
    del hex_tmp
    
    #--------------- PROCESS DATA ---------------
    # Filter for data of interest
    hex_pop_f = hex_pop[['hex_id_8','p_0a2','p_3a5']]
    
    # Save disk space
    del hex_pop
    
    hex_pop_f.rename(columns={'p_0a2':'pob_0a2','p_3a5':'pob_3a5'},inplace=True)

    # Calculate age groups
    hex_pop_f['pob_0a5'] = hex_pop_f['pob_0a2'] + hex_pop_f['pob_3a5']

    # Merge data
    hex_cdcuidadoras_pop = pd.merge(cd_cuidadoras,hex_pop_f, on='hex_id_8')
    
    # Save disk space
    del cd_cuidadoras
    del hex_pop_f

    # Add res data and remove res from hexid
    hex_cdcuidadoras_pop['res'] = 8
    hex_cdcuidadoras_pop.rename(columns={'hex_id_8':'hex_id'},inplace=True)

    # Reorder columns
    reordered_list = ['hex_id', 'res','geometry',
                      'max_preescolar',
                      'max_primaria',
                      'max_secundaria',
                      'max_salud',
                      'max_guarderias',
                      'max_alimentos',
                      'max_personal',
                      'max_parques',
                      'max_idx_15_min',
                      'pobtot',
                      'pobfem',
                      'pobmas',
                      'pob_0a2',
                      'pob_3a5',
                      'pob_0a5',
                      'pob_0a14',
                      'pob_15a24',
                      'pob_25a59',
                      'p_60ymas',
                      'dens_pobha',
                      'city']

    hex_cdcuidadoras_pop = hex_cdcuidadoras_pop[reordered_list]
    
    #--------------- UPLOAD DATA ---------------
    if save:
        aup.gdf_to_db_slow(hex_cdcuidadoras_pop, "cdcuidadoras_popageb_2020_hex", 'prox_analysis', if_exists='append')
        print(f'Uploaded {city} data to db')
    print('--'*20)

In [4]:
#Load mun data
mun_schema = 'metropolis'
mun_table = 'metro_gdf'
query = f"SELECT * FROM {mun_schema}.{mun_table}" 
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')

#Find already processed cities
prox_schema = 'prox_analysis'
prox_table = 'cdcuidadoras_popageb_2020_hex'
query = f"SELECT * FROM {prox_schema}.{prox_table}"
prox_all = aup.gdf_from_query(query, geometry_col='geometry')
processed_city_list = list(prox_all.city.unique())

# Skip ZMVM due to size
#processed_city_list.append('ZMVM')

print(f"Already processed {len(processed_city_list)} cities:")
print(processed_city_list)

#Run main function
for city in mun_gdf.city.unique():
        if city not in processed_city_list:
            cvegeo_list = list(mun_gdf.loc[mun_gdf.city==city]["CVEGEO"].unique())
            main(city, cvegeo_list, save=True)

Already processed 73 cities:
['Aguascalientes', 'Ensenada', 'Mexicali', 'Tijuana', 'La Paz', 'Campeche', 'Laguna', 'Monclova', 'Piedras Negras', 'Saltillo', 'Colima', 'Tecoman', 'Tapachula', 'Tuxtla', 'Chihuahua', 'Delicias', 'Juarez', 'Parral', 'Durango', 'Celaya', 'Guanajuato', 'Leon', 'Moroleon', 'San Francisco', 'Acapulco', 'Chilpancingo', 'Pachuca', 'Tula', 'Tulancingo', 'Guadalajara', 'Ocotlan', 'Vallarta', 'Tianguistenco', 'Toluca', 'Piedad', 'Morelia', 'Zamora', 'Cuautla', 'Cuernavaca', 'Tepic', 'Monterrey', 'Oaxaca', 'Tehuantepec', 'Puebla', 'Tehuacan', 'Teziutlan', 'Queretaro', 'Cancun', 'Chetumal', 'Rio Verde', 'SLP', 'Culiacan', 'Mazatlan', 'Guaymas', 'Hermosillo', 'Nogales', 'Villahermosa', 'Victoria', 'Matamoros', 'Nuevo Laredo', 'Reynosa', 'Tampico', 'Tlaxcala', 'Acayucan', 'Coatzacoalcos', 'Cordoba', 'Minatitlan', 'Orizaba', 'Poza Rica', 'Veracruz', 'Xalapa', 'Merida', 'Zacatecas']
STARTING ANALYSIS FOR ZMVM.
ZMVM con 75 cvegeos:
['09002', '09003', '09004', '09005', '09

## Part 2: Analyse that data considering proximity categories

In [10]:
#Load mun data
mun_schema = 'metropolis'
mun_table = 'metro_gdf'
query = f"SELECT * FROM {mun_schema}.{mun_table}" 
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')

#Run main process
pop_data = pd.DataFrame()

for city in mun_gdf.city.unique():
    
    print(f'STARTING ANALYSIS FOR {city}')
    
    #--------------- DOWNLOAD DATA ---------------
    # Load proximity hexres9 for city
    prox_schema = 'prox_analysis'
    prox_table = 'cdcuidadoras_popageb_2020_hex'
    query = f"SELECT * FROM {prox_schema}.{prox_table} WHERE \"city\" LIKE \'{city}\'"
    prox_city = aup.gdf_from_query(query, geometry_col='geometry')
    
    #--------------- PROCESS DATA ---------------
    # Create time categories
    prox_city['prox_cat'] = np.nan
    prox_city.loc[prox_city.max_idx_15_min>60 , 'prox_cat'] = '+60'
    prox_city.loc[(prox_city.max_idx_15_min>45 )&
                 (prox_city.max_idx_15_min<=60), 'prox_cat'] = '45a60'
    prox_city.loc[(prox_city.max_idx_15_min>30)&
                 (prox_city.max_idx_15_min<=45), 'prox_cat'] = '30a45'
    prox_city.loc[(prox_city.max_idx_15_min>15)&
                 (prox_city.max_idx_15_min<=30), 'prox_cat'] = '15a30'
    prox_city.loc[(prox_city.max_idx_15_min<=15), 'prox_cat'] = '0a15'

    categories = ['0a15', '15a30', '30a45', '45a60', '+60']
    prox_city['prox_cat'] = pd.Categorical(prox_city['prox_cat'], categories=categories, ordered=True)

    # Group by pop data
    pob_0a5_tot = prox_city.pob_0a5.sum()
    print(f'Total pob_0a5 in {city}: {pob_0a5_tot}.')
    
    pobtot_summary = prox_city.groupby('prox_cat').agg({'pob_0a5':np.sum})
    pobtot_summary.rename(columns={'pob_0a5':city},inplace=True)
    pobtot_transposed = pobtot_summary.transpose()
    
    pobtot_transposed['hexpop_total'] = pob_0a5_tot
    pobtot_transposed['cat_total'] = pobtot_transposed['0a15']+pobtot_transposed['15a30']+pobtot_transposed['30a45']+pobtot_transposed['45a60']+pobtot_transposed['+60']
    pobtot_transposed['diff'] = pobtot_transposed['hexpop_total'] - pobtot_transposed['cat_total']
    
    pop_data = pd.concat([pop_data,pobtot_transposed])
    
    print(f'ANALIZED DATA FOR {city}')

STARTING ANALYSIS FOR Aguascalientes
Total pob_0a5 in Aguascalientes: 104671.77065957601.
ANALIZED DATA FOR Aguascalientes
STARTING ANALYSIS FOR Ensenada
Total pob_0a5 in Ensenada: 34588.64926271.
ANALIZED DATA FOR Ensenada
STARTING ANALYSIS FOR Mexicali
Total pob_0a5 in Mexicali: 73747.36127354001.
ANALIZED DATA FOR Mexicali
STARTING ANALYSIS FOR Tijuana
Total pob_0a5 in Tijuana: 179720.14984799002.
ANALIZED DATA FOR Tijuana
STARTING ANALYSIS FOR La Paz
Total pob_0a5 in La Paz: 21486.807822659997.
ANALIZED DATA FOR La Paz
STARTING ANALYSIS FOR Campeche
Total pob_0a5 in Campeche: 22970.59942866.
ANALIZED DATA FOR Campeche
STARTING ANALYSIS FOR Laguna
Total pob_0a5 in Laguna: 128634.57215102.
ANALIZED DATA FOR Laguna
STARTING ANALYSIS FOR Monclova
Total pob_0a5 in Monclova: 38671.71460819.
ANALIZED DATA FOR Monclova
STARTING ANALYSIS FOR Piedras Negras
Total pob_0a5 in Piedras Negras: 22519.508575554.
ANALIZED DATA FOR Piedras Negras
STARTING ANALYSIS FOR Saltillo
Total pob_0a5 in Salti

In [12]:
pop_data.head(74)

prox_cat,0a15,15a30,30a45,45a60,+60,hexpop_total,cat_total,diff
Aguascalientes,22418.905235,42654.005616,23172.847004,9832.948959,6593.063845,104671.770660,104671.770660,0.000000e+00
Ensenada,5794.218179,10888.917948,9074.294967,3837.561425,4993.656744,34588.649263,34588.649263,0.000000e+00
Mexicali,9242.608148,37104.569663,16017.569776,4611.115453,6771.498234,73747.361274,73747.361274,0.000000e+00
Tijuana,8161.645690,60723.610009,50884.396869,23715.810322,36234.686958,179720.149848,179720.149848,2.910383e-11
La Paz,2130.388570,9383.840800,6141.442438,1594.179854,2236.956160,21486.807823,21486.807823,-3.637979e-12
...,...,...,...,...,...,...,...,...
Poza Rica,7420.801649,9252.608600,4179.864352,1755.446248,3561.279425,26170.000274,26170.000274,0.000000e+00
Veracruz,14928.805967,26700.618658,7550.570806,3574.591879,3902.439429,56657.026739,56657.026739,0.000000e+00
Xalapa,16935.503234,20150.999836,8695.492385,1956.426727,3423.585798,51162.007980,51162.007980,0.000000e+00
Merida,15698.690275,35411.064909,23577.812372,13585.038543,12139.393686,100411.999784,100411.999784,0.000000e+00


In [14]:
output = pop_data[['0a15','15a30','30a45','45a60','+60']]
output.to_csv('../../../data/external/temporal_fromjupyter/popdata_cdcuidado.csv')