In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import scipy
import geopandas as gpd
import xarray as xr
import sparse
from itertools import product, combinations
from pathlib import Path
from pprint import pprint
import matplotlib.pyplot as plt
import matplotlib
from collections import defaultdict

pd.options.display.max_rows = 500
#pd.options.display.max_columns = 4000

import sys
sys.path.append('../src/')

from extended_survey import process_people_df, process_places_df, categorize_p, categorize_v
from census import process_census
from constraints import get_ind_const, get_viv_const
from misc import mun_dict, met_zone
from taz import load_marco_geo, merge_mg_taz, plot_taz_mg, plot_taz_mg_unass, plot_taz_empty_mg, plot_taz_codes, plot_chull

survey_dir = Path('../data/cuestionario_ampliado/Censo2020_CA_nl_csv/')
personas_path = Path('../data/cuestionario_ampliado/Censo2020_CA_nl_csv/Personas19.CSV')
viviendas_path = Path('../data/cuestionario_ampliado/Censo2020_CA_nl_csv/Viviendas19.CSV')
census_iter_path = Path('../data/census_loc/ITER_19CSV20.csv')
census_resageburb_path = Path('../data/census_ageb_manz/RESAGEBURB_19CSV20.csv')
output_path = Path('../output/')

In [2]:
(
    df_mun, df_loc,
    df_agebs, df_agebs_min, df_agebs_max
) = process_census(census_iter_path, census_resageburb_path)

In [3]:
marco_geo_path = Path('../data/19_nuevoleon.gpkg')
mg = load_marco_geo(marco_geo_path, df_mun, df_loc, df_agebs_min)

taz = gpd.read_file('/Users/gperaza/Downloads/TAZ/Zonas.shp').to_crs(mg.crs).drop(columns='ESTADO')
mtaz = gpd.read_file('/Users/gperaza/Downloads/TAZ/Macrozonas.shp').to_crs(mg.crs)

In [4]:
from matplotlib.backends.backend_pdf import PdfPages
import warnings

In [5]:
overlay_dict = {}
overlay_dropped_dict = {}
taz_dict = {}
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    with PdfPages('multipage_pdf.pdf') as pdf:
        for mun in met_zone:
            taz_mun = taz[taz.MUNICIPIO == mun].copy()
            taz_dict[mun] = taz_mun.set_index('ZONA').sort_index()
            overlay, overlay_dropped = merge_mg_taz(mun, taz, mg, mun_dict)
            overlay_dict[mun] = overlay
            overlay_dropped_dict[mun] = overlay_dropped

            fig, ax = plt.subplots(2, 2, figsize=(20, 20))

            plot_taz_mg(overlay, taz_mun, title=mun, ax=ax[0, 0])
            plot_taz_mg_unass(overlay, taz_mun, title='Unassigned AGEBS', ax=ax[0, 1])
            plot_taz_empty_mg(overlay, taz_mun, title='Empty TAZs', ax=ax[1, 0])
            plot_chull(taz_mun, title='Multipol TAZs', ax=ax[1, 1])

            # fig.savefig(f'taz_mg_{mun}.pdf')
            pdf.savefig()

            plt.close()

Monterrey LOC  AGEB
1    5513    0.272577
     5566    0.011953
     5551    0.010990
     5585    0.010577
Name: ratio, dtype: float64
Guadalupe LOC  AGEB
1    0995    0.479441
     1264    0.378678
     2686    0.339026
     2898    0.022894
Name: ratio, dtype: float64
Apodaca LOC  AGEB
1    2946    0.485896
     2787    0.252243
     3060    0.051779
174  016A    0.016672
1    2984    0.014885
Name: ratio, dtype: float64
San Nicolás de los Garza Series([], Name: ratio, dtype: float64)
General Escobedo Series([], Name: ratio, dtype: float64)
Santa Catarina LOC  AGEB
1    0761    0.32946
Name: ratio, dtype: float64
Juárez LOC  AGEB
1    1546    0.456507
45   1442    0.448428
224  1495    0.171497
1    1550    0.116918
45   2099    0.030313
1    2046    0.020368
Name: ratio, dtype: float64
García LOC  AGEB
236  1807    0.482288
1    1559    0.476531
     1313    0.405824
105  1351    0.392891
1    0372    0.343929
     1474    0.243603
236  1826    0.237855
25   1648    0.185779
1    1

In [6]:
for mun, ovr in overlay_dict.items():
    taz_dict[mun] = taz_dict[mun].join(overlay_dict[mun].groupby('ZONA')[df_agebs_min.columns].sum().astype(int), how='outer')
    assert taz_dict[mun].index.is_unique
assert(pd.concat(taz_dict.values()).drop(-10).shape[0] == taz.dropna().shape[0]) 

In [8]:
for mun, gdf in taz_dict.items():
    gdf.to_file("taz_census.gpkg", layer=mun, driver="GPKG") 