In [6]:
import os
import tempfile
import zipfile
from pathlib import Path

import geopandas as gpd
import pandas as pd

In [7]:
data_path = Path(os.environ["DATA_PATH"])

In [10]:
with (
    zipfile.ZipFile(
        data_path / "raws" / "2020" / "geom" / "02_bajacalifornia.zip",
    ) as zf,
    tempfile.TemporaryDirectory() as tmpdir,
):
    zf.extractall(tmpdir)
    extracted_path = Path(tmpdir) / "02_bajacalifornia"
    df_blocks = gpd.read_file(extracted_path / "02m.shp").query("CVEGEO.str.startswith('02002')").set_index("CVEGEO")["geometry"]

with (
    zipfile.ZipFile(
        data_path / "raws" / "2020" / "census" / "ageb_mza_urbana_02_cpv2020_csv.zip",
    ) as zf,
    tempfile.TemporaryDirectory() as tmpdir,
):
    zf.extractall(tmpdir)
    extracted_path = Path(tmpdir) / "ageb_mza_urbana_02_cpv2020"
    df_census = (
        pd.read_csv(
            extracted_path
            / "conjunto_de_datos"
            / "conjunto_de_datos_ageb_urbana_02_cpv2020.csv",
        )
        .assign(
            CVEGEO=lambda df: (
                df["ENTIDAD"].astype(str).str.zfill(2)
                + df["MUN"].astype(str).str.zfill(3)
                + df["LOC"].astype(str).str.zfill(4)
                + df["AGEB"].astype(str).str.zfill(4)
                + df["MZA"].astype(str).str.zfill(3)
            ),
        )
        .query("CVEGEO.str.startswith('02002')")
        .set_index("CVEGEO")[["POBTOT", "TVIVPARHAB"]]
    )

df_metropoli = gpd.read_file(data_path / "raws" / "metropolis_2020.zip")

In [11]:
df_out = gpd.GeoDataFrame(pd.concat([df_census, df_blocks], axis=1).dropna(subset=["geometry"]), crs=df_blocks.crs, geometry="geometry")
df_out.to_file("./test.gpkg")

In [5]:
df_out

Unnamed: 0_level_0,POBTOT,TVIVPARHAB,geometry
CVEGEO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0803700010401001,0.0,0,"POLYGON ((2075331.571 2194416.017, 2075331.182..."
0803700010401002,0.0,0,"POLYGON ((2075400.572 2194290.152, 2075479.8 2..."
0803700010401003,0.0,0,"POLYGON ((2075234.524 2194421.021, 2075234.394..."
0803700010401004,0.0,0,"POLYGON ((2075950.178 2194417.621, 2075946.995..."
0803700010401005,0.0,0,"POLYGON ((2075933.756 2194405.716, 2075930.73 ..."
...,...,...,...
0803706133092259,,,"POLYGON ((2089290.984 2175349.75, 2089358.94 2..."
0803706333177065,,,"POLYGON ((2071514.925 2148334.986, 2071559.103..."
0803706333177072,,,"POLYGON ((2071494.8 2148256.556, 2071514.524 2..."
0803706333177078,,,"POLYGON ((2071694.019 2148002.852, 2071742.484..."


In [11]:
df_blocks

Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,geometry
0,0200100019602020,02,001,0001,9602,020,Urbana,Típica,"POLYGON ((1113275.198 2264580.962, 1113270.756..."
1,0200105206519122,02,001,0520,6519,122,Rural,Típica,"POLYGON ((1177646.465 2235944.288, 1177698.997..."
2,020010001806A009,02,001,0001,806A,009,Urbana,Típica,"POLYGON ((1112936.822 2261844.96, 1112890.546 ..."
3,0200100018445011,02,001,0001,8445,011,Urbana,Típica,"POLYGON ((1116386.529 2272022.76, 1116364.41 2..."
4,0200100010736051,02,001,0001,0736,051,Urbana,Típica,"POLYGON ((1112565.857 2270573.124, 1112645.946..."
...,...,...,...,...,...,...,...,...,...
65683,0200600681073018,02,006,0068,1073,018,Rural,Típica,"POLYGON ((1270247.443 2017236.945, 1270281.627..."
65684,0200600681073019,02,006,0068,1073,019,Rural,Típica,"POLYGON ((1270285.457 2017263.62, 1270311.229 ..."
65685,0200600681073020,02,006,0068,1073,020,Rural,Típica,"POLYGON ((1270316.328 2017290.796, 1270340.845..."
65686,0200608611020064,02,006,0861,1020,064,Rural,Típica,"POLYGON ((1164808.454 2101272.124, 1164912.124..."
