In [3]:
import numpy as np
import pandas as pd
import geopandas as gdp
import keplergl
from dms2dec.dms_convert import dms2dec

# Leer los datos del CENSO 2020 en Edomex

In [4]:
df_edomex = pd.read_csv('data/ageb_mza_urbana_15_cpv2020/conjunto_de_datos/conjunto_de_datos_ageb_urbana_15_cpv2020.csv')
df_edomex

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,AGEB,MZA,POBTOT,POBFEM,...,VPH_TELEF,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC
0,15,México,0,Total de la entidad México,0,Total de la entidad,0000,0,16992418,8741123,...,2123211,4049493,2561525,1559564,822048,572663,133777,329132,1769551,53857
1,15,México,1,Acambay de Ruíz Castañeda,0,Total del municipio,0000,0,67872,35255,...,1954,13388,2973,2140,393,257,1352,3427,13523,734
2,15,México,1,Acambay de Ruíz Castañeda,1,Total de la localidad urbana,0000,0,5988,3148,...,561,1441,854,677,226,118,55,75,573,10
3,15,México,1,Acambay de Ruíz Castañeda,1,Total AGEB urbana,0127,0,3373,1796,...,360,831,523,427,143,80,33,45,309,5
4,15,México,1,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,0127,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141525,15,México,125,Tonanitla,7,Villas de Tonanitla,0069,44,11,5,...,*,4,5,4,3,4,*,0,0,0
141526,15,México,125,Tonanitla,7,Villas de Tonanitla,0069,45,19,12,...,*,5,5,3,5,*,0,0,0,0
141527,15,México,125,Tonanitla,7,Villas de Tonanitla,0069,46,3,*,...,*,*,*,*,*,*,*,*,*,*
141528,15,México,125,Tonanitla,7,Villas de Tonanitla,0069,47,12,7,...,0,4,*,*,0,0,0,0,*,0


# Limpieza de datos

## Limpiamos datos perdidos marcados como '*'

In [5]:
df_edomex_solt_mun = df_edomex[df_edomex['P12YM_SOLT'] != '*'].loc[:,('NOM_MUN','LOC','NOM_LOC','P_12YMAS_F','P_12YMAS_M','P12YM_SOLT')]
df_edomex_solt_mun

Unnamed: 0,NOM_MUN,LOC,NOM_LOC,P_12YMAS_F,P_12YMAS_M,P12YM_SOLT
0,Total de la entidad México,0,Total de la entidad,7137483,6605364,4799091
1,Acambay de Ruíz Castañeda,0,Total del municipio,27342,24517,18101
2,Acambay de Ruíz Castañeda,1,Total de la localidad urbana,2568,2182,1780
3,Acambay de Ruíz Castañeda,1,Total AGEB urbana,1488,1207,1029
4,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,0,0,0
...,...,...,...,...,...,...
141521,Tonanitla,7,Villas de Tonanitla,5,10,4
141522,Tonanitla,7,Villas de Tonanitla,12,12,3
141524,Tonanitla,7,Villas de Tonanitla,10,10,5
141526,Tonanitla,7,Villas de Tonanitla,10,5,5


## Reemplazamos datos perdidos no aleatorios con NaN

In [6]:
df_edomex_solt_mun['P12YM_SOLT'].replace('N/D', np.NaN, inplace=True)
df_edomex_solt_mun.dropna(inplace=True)
df_edomex_solt_mun

Unnamed: 0,NOM_MUN,LOC,NOM_LOC,P_12YMAS_F,P_12YMAS_M,P12YM_SOLT
0,Total de la entidad México,0,Total de la entidad,7137483,6605364,4799091
1,Acambay de Ruíz Castañeda,0,Total del municipio,27342,24517,18101
2,Acambay de Ruíz Castañeda,1,Total de la localidad urbana,2568,2182,1780
3,Acambay de Ruíz Castañeda,1,Total AGEB urbana,1488,1207,1029
4,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,0,0,0
...,...,...,...,...,...,...
141521,Tonanitla,7,Villas de Tonanitla,5,10,4
141522,Tonanitla,7,Villas de Tonanitla,12,12,3
141524,Tonanitla,7,Villas de Tonanitla,10,10,5
141526,Tonanitla,7,Villas de Tonanitla,10,5,5


### Hacemos cast para convertir a datos de tipo numérico la columna de personas solteras

In [7]:
df_edomex_solt_mun['P12YM_SOLT'] = df_edomex_solt_mun['P12YM_SOLT'].astype(int)
df_edomex_solt_mun

Unnamed: 0,NOM_MUN,LOC,NOM_LOC,P_12YMAS_F,P_12YMAS_M,P12YM_SOLT
0,Total de la entidad México,0,Total de la entidad,7137483,6605364,4799091
1,Acambay de Ruíz Castañeda,0,Total del municipio,27342,24517,18101
2,Acambay de Ruíz Castañeda,1,Total de la localidad urbana,2568,2182,1780
3,Acambay de Ruíz Castañeda,1,Total AGEB urbana,1488,1207,1029
4,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,0,0,0
...,...,...,...,...,...,...
141521,Tonanitla,7,Villas de Tonanitla,5,10,4
141522,Tonanitla,7,Villas de Tonanitla,12,12,3
141524,Tonanitla,7,Villas de Tonanitla,10,10,5
141526,Tonanitla,7,Villas de Tonanitla,10,5,5


## Cargamos el dataset que contiene las columnas de latitud y longitud

In [8]:
df_latlon = pd.read_csv('data/iter_15_cpv2020/conjunto_de_datos/conjunto_de_datos_iter_15CSV20.csv')
df_latlon

Unnamed: 0,ENTIDAD,NOM_ENT,MUN,NOM_MUN,LOC,NOM_LOC,LONGITUD,LATITUD,ALTITUD,POBTOT,...,VPH_CEL,VPH_INTER,VPH_STVP,VPH_SPMVPI,VPH_CVJ,VPH_SINRTV,VPH_SINLTC,VPH_SINCINT,VPH_SINTIC,TAMLOC
0,15,México,0,Total de la entidad México,0,Total de la Entidad,,,,16992418,...,4049493,2561525,1559564,822048,572663,133777,329132,1769551,53857,*
1,15,México,0,Total de la entidad México,9998,Localidades de una vivienda,,,,6011,...,150,33,50,8,4,39,41,148,23,*
2,15,México,0,Total de la entidad México,9999,Localidades de dos viviendas,,,,765,...,153,38,52,7,6,36,51,156,23,*
3,15,México,1,Acambay de Ruíz Castañeda,0,Total del Municipio,,,,67872,...,13388,2973,2140,393,257,1352,3427,13523,734,*
4,15,México,1,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,"99°50'38.515"" W","19°57'22.423"" N",2564.0,5988,...,1441,854,677,226,118,55,75,573,10,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5131,15,México,125,Tonanitla,4,Colonia las Chinampas,"99°03'16.321"" W","19°41'45.927"" N",2237.0,520,...,128,40,27,14,18,0,11,93,0,3
5132,15,México,125,Tonanitla,5,Valle Verde,"99°02'50.531"" W","19°41'37.932"" N",2239.0,299,...,56,15,9,3,5,2,16,48,1,2
5133,15,México,125,Tonanitla,6,San Bartolo,"99°03'34.313"" W","19°39'54.355"" N",2254.0,9,...,*,*,*,*,*,*,*,*,*,1
5134,15,México,125,Tonanitla,7,Villas de Tonanitla,"99°04'09.823"" W","19°40'41.797"" N",2241.0,2601,...,827,677,422,402,352,3,16,95,0,5


### Nos quedamos solo con las columnas que necesitamos del lado de iter

In [9]:
df_latlon = df_latlon.loc[1:,('NOM_MUN','LOC','NOM_LOC','LATITUD','LONGITUD')]
df_latlon

Unnamed: 0,NOM_MUN,LOC,NOM_LOC,LATITUD,LONGITUD
1,Total de la entidad México,9998,Localidades de una vivienda,,
2,Total de la entidad México,9999,Localidades de dos viviendas,,
3,Acambay de Ruíz Castañeda,0,Total del Municipio,,
4,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,"19°57'22.423"" N","99°50'38.515"" W"
5,Acambay de Ruíz Castañeda,2,Agostadero (San José Agostadero),"20°02'25.109"" N","100°00'09.008"" W"
...,...,...,...,...,...
5131,Tonanitla,4,Colonia las Chinampas,"19°41'45.927"" N","99°03'16.321"" W"
5132,Tonanitla,5,Valle Verde,"19°41'37.932"" N","99°02'50.531"" W"
5133,Tonanitla,6,San Bartolo,"19°39'54.355"" N","99°03'34.313"" W"
5134,Tonanitla,7,Villas de Tonanitla,"19°40'41.797"" N","99°04'09.823"" W"


## Ya podemos hacer Join con nuestras tablas :D (mi primera parte favorita)


In [17]:
df_merged = pd.merge(df_edomex_solt_mun, df_latlon, on=['NOM_MUN', 'LOC', 'NOM_LOC'], how='inner')
df_merged

Unnamed: 0,NOM_MUN,LOC,NOM_LOC,P_12YMAS_F,P_12YMAS_M,P12YM_SOLT,LATITUD,LONGITUD
0,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,0,0,0,"19°57'22.423"" N","99°50'38.515"" W"
1,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,8,4,8,"19°57'22.423"" N","99°50'38.515"" W"
2,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,12,6,8,"19°57'22.423"" N","99°50'38.515"" W"
3,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,15,7,10,"19°57'22.423"" N","99°50'38.515"" W"
4,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,9,8,3,"19°57'22.423"" N","99°50'38.515"" W"
...,...,...,...,...,...,...,...,...
128108,Tonanitla,7,Villas de Tonanitla,5,10,4,"19°40'41.797"" N","99°04'09.823"" W"
128109,Tonanitla,7,Villas de Tonanitla,12,12,3,"19°40'41.797"" N","99°04'09.823"" W"
128110,Tonanitla,7,Villas de Tonanitla,10,10,5,"19°40'41.797"" N","99°04'09.823"" W"
128111,Tonanitla,7,Villas de Tonanitla,10,5,5,"19°40'41.797"" N","99°04'09.823"" W"


### Finalmente añadimos las columnas de latitud y longitud

Observa que tenemos duplicados en las coordenadas

In [18]:
df_merged.columns = ('NOM_MUN', 'LOC', 'NOM_LOC', 'P_12YMAS_F', 'P_12YMAS_M', 'P12YM_SOLT', 'LATITUDE', 'LONGITUDE')
df_merged


Unnamed: 0,NOM_MUN,LOC,NOM_LOC,P_12YMAS_F,P_12YMAS_M,P12YM_SOLT,LATITUDE,LONGITUDE
0,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,0,0,0,"19°57'22.423"" N","99°50'38.515"" W"
1,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,8,4,8,"19°57'22.423"" N","99°50'38.515"" W"
2,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,12,6,8,"19°57'22.423"" N","99°50'38.515"" W"
3,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,15,7,10,"19°57'22.423"" N","99°50'38.515"" W"
4,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,9,8,3,"19°57'22.423"" N","99°50'38.515"" W"
...,...,...,...,...,...,...,...,...
128108,Tonanitla,7,Villas de Tonanitla,5,10,4,"19°40'41.797"" N","99°04'09.823"" W"
128109,Tonanitla,7,Villas de Tonanitla,12,12,3,"19°40'41.797"" N","99°04'09.823"" W"
128110,Tonanitla,7,Villas de Tonanitla,10,10,5,"19°40'41.797"" N","99°04'09.823"" W"
128111,Tonanitla,7,Villas de Tonanitla,10,5,5,"19°40'41.797"" N","99°04'09.823"" W"


In [None]:
Agrupar

In [20]:
df_merged.LATITUDE = df_merged.LATITUDE.apply(dms2dec)
df_merged.LONGITUDE = df_merged.LONGITUDE.apply(dms2dec)
df_merged

Unnamed: 0,NOM_MUN,LOC,NOM_LOC,P_12YMAS_F,P_12YMAS_M,P12YM_SOLT,LATITUDE,LONGITUDE
0,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,0,0,0,19.956229,-99.844032
1,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,8,4,8,19.956229,-99.844032
2,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,12,6,8,19.956229,-99.844032
3,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,15,7,10,19.956229,-99.844032
4,Acambay de Ruíz Castañeda,1,Villa de Acambay de Ruíz Castañeda,9,8,3,19.956229,-99.844032
...,...,...,...,...,...,...,...,...
128108,Tonanitla,7,Villas de Tonanitla,5,10,4,19.678277,-99.069395
128109,Tonanitla,7,Villas de Tonanitla,12,12,3,19.678277,-99.069395
128110,Tonanitla,7,Villas de Tonanitla,10,10,5,19.678277,-99.069395
128111,Tonanitla,7,Villas de Tonanitla,10,5,5,19.678277,-99.069395


## Generamos un mapa y le agregamos datos

On the menu we have:
Layers — Defines how the variables are encoded to the map
Filters — For selecting smaller sets of data
Interactions — Defines interactions such as Tooltips, search boxes, and others
Basemap — Defines the style of the world map and other elements like labels, roads, styles

In [15]:
kepler_map = keplergl.KeplerGl(height=400)
kepler_map

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl()

In [21]:
kepler_map.add_data(data=df_merged, name="solteras")