# Variables of households and population of Mexican Municipalities and States in 2020

This notebook downloads households and population data of Mexican Municipalities (admin2) derived from the 2020 Mexican Census: [INEGI](https://inegi.org.mx/programas/ccpv/2020/#Datos_abiertos). It saves the data in the compressed Parquet format for downstream analysis.

In [1]:
import pandas as pd
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile

### Download the zip file from the INEGI website

In [2]:
url = 'https://inegi.org.mx/contenidos/programas/ccpv/2020/datosabiertos/iter/iter_00_cpv2020_csv.zip'

In [3]:
response = urlopen(url)

### Read zip file and list files

In [4]:
zip = ZipFile(BytesIO(response.read()))

for file in zip.infolist():
    print(file.filename)

catalogos/tam_loc.csv.csv
conjunto_de_datos/conjunto_de_datos_iter_00CSV20.csv
diccionario_datos/diccionario_datos_iter_00CSV20.csv
metadatos/metadatos_iter_00_cpv2020.txt


### Extract the file conjunto_de_datos_iter_00CSV20.csv from the zip file

In [5]:
content = zip.read('conjunto_de_datos/conjunto_de_datos_iter_00CSV20.csv')

Read the csv file into a Pandas Dataframe

In [6]:
df = pd.read_csv(BytesIO(content), low_memory=False)
df.fillna('')

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,LONGITUD,LATITUD,ALTITUD,POBTOT,...,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC,TAMLOC
0,0,Total nacional,0,Total nacional,0,Total nacional,,,,126014024,...,30775898,18307193,15211306,6616141,4047100,1788552,3170894,15108204,852871,*
1,0,Total nacional,0,Total nacional,9998,Localidades de una vivienda,,,,250354,...,47005,8385,18981,1732,1113,12775,14143,51293,7154,*
2,0,Total nacional,0,Total nacional,9999,Localidades de dos viviendas,,,,147125,...,25581,5027,11306,971,708,8247,10065,29741,5283,*
3,1,Aguascalientes,0,Total de la entidad Aguascalientes,0,Total de la Entidad,,,,1425607,...,359895,236003,174089,98724,70126,6021,15323,128996,1711,*
4,1,Aguascalientes,0,Total de la entidad Aguascalientes,9998,Localidades de una vivienda,,,,3697,...,732,205,212,48,41,39,62,530,20,*
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195657,32,Zacatecas,58,Santa María de la Paz,37,Mesa Grande,"103°24'52.925"" W","21°33'12.375"" N",1944,165,...,24,9,13,0,0,3,9,38,1,1
195658,32,Zacatecas,58,Santa María de la Paz,39,San Isidro,"103°20'14.164"" W","21°29'11.130"" N",2031,1,...,*,*,*,*,*,*,*,*,*,1
195659,32,Zacatecas,58,Santa María de la Paz,41,San Miguel Tepetitlán,"103°20'09.356"" W","21°30'15.168"" N",1977,97,...,21,12,21,1,1,1,10,19,0,1
195660,32,Zacatecas,58,Santa María de la Paz,42,San Rafael,"103°22'20.134"" W","21°31'39.341"" N",2042,3,...,*,*,*,*,*,*,*,*,*,1


In [7]:
df.head()

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,LONGITUD,LATITUD,ALTITUD,POBTOT,...,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC,TAMLOC
0,0,Total nacional,0,Total nacional,0,Total nacional,,,,126014024,...,30775898,18307193,15211306,6616141,4047100,1788552,3170894,15108204,852871,*
1,0,Total nacional,0,Total nacional,9998,Localidades de una vivienda,,,,250354,...,47005,8385,18981,1732,1113,12775,14143,51293,7154,*
2,0,Total nacional,0,Total nacional,9999,Localidades de dos viviendas,,,,147125,...,25581,5027,11306,971,708,8247,10065,29741,5283,*
3,1,Aguascalientes,0,Total de la entidad Aguascalientes,0,Total de la Entidad,,,,1425607,...,359895,236003,174089,98724,70126,6021,15323,128996,1711,*
4,1,Aguascalientes,0,Total de la entidad Aguascalientes,9998,Localidades de una vivienda,,,,3697,...,732,205,212,48,41,39,62,530,20,*


### Save the dataframe in the compressed parquet format

In [8]:
df.to_parquet('../data/conjunto_de_datos_iter_00CSV20.parquet', compression='brotli', index=False)