In [2]:
# python
from pathlib import Path
import json

# database
import sqlite3

# data analysis
import pandas as pd
import geopandas as gpd

### Paths

In [3]:
current_dir = Path.cwd()
parent_dir = current_dir.parent

### Database connection

In [4]:
conn = sqlite3.connect(parent_dir / 'data' / 'caf_accessibility.db')

def get_dataframe_from_sqlite_db(table_name: str, conn: sqlite3.Connection=conn, geo_type: str='polygon'):
    # columns = utils.concat_list()   
    columns = '*'
    df = pd.read_sql(f'SELECT {columns} FROM {table_name}', conn)
    if geo_type == 'polygon':
        geo = get_geo_from_sqlite_db(table_name, conn)
    elif geo_type == 'point':
        geo = get_geo_from_sqlite_db(table_name, conn)
        features = geo['features']
        lat = list()
        lon = list()
        for f in features:
            lat.append(f['geometry']['coordinates'][1])
            lon.append(f['geometry']['coordinates'][0])
        geo = pd.DataFrame(data={'lat': lat, 'lon': lon})
    return df, geo

def get_geo_from_sqlite_db(table_name: str, conn: sqlite3.Connection=conn):
    c = conn.cursor()
    c.execute(f'SELECT geometry FROM Geos WHERE table_ = "{table_name}"')
    result = c.fetchone()[0]
    return json.loads(result)

### Accessibility dataset

In [5]:
accessibility_df, accessibility_geo = get_dataframe_from_sqlite_db(
    table_name='Accessibility', conn=conn)
accessibility_geo = gpd.GeoDataFrame.from_features(accessibility_geo['features'])
accessibility_df['geometry'] = accessibility_geo.geometry
accessibility_df = gpd.GeoDataFrame(accessibility_df, geometry='geometry')

In [6]:
censo_bogota_pers = pd.read_csv(
    parent_dir / 'data' / '11Bogota'/'11_Bogota_CSV'/'CNPV2018_5PER_A2_11.CSV'
)

In [130]:
censo_bogota_pers = pd.read_csv(
    parent_dir / 'data' / '11Bogota'/'11_Bogota_CSV'/'CNPV2018_5PER_A2_11.CSV'
)

marco_geo = pd.read_csv(
    parent_dir / 'data' / '11Bogota'/'11_Bogota_CSV'/'CNPV2018_MGN_A2_11.CSV'
)
censo_bogota_pers = censo_bogota_pers.merge(marco_geo[['COD_ENCUESTAS', 'COD_DANE_ANM']], how='left', on='COD_ENCUESTAS')


## Personas vulnerables

In [132]:
censo_bogota_pers = censo_bogota_pers[['COD_DANE_ANM', 'CONDICION_FISICA', 'P_ALFABETA', 'PA1_GRP_ETNIC', 'P_SEXO']]

## Marco geoestadistico

In [71]:
mge = gpd.read_file(parent_dir / 'data' / 'MGN2021_URB_MANZANA' / 'MGN_URB_MANZANA.shp')
mge_bta = mge[mge.COD_DPTO=='11'].copy()
mge_bta['geometry'] = mge_bta.geometry.centroid
del(mge)


  mge_bta['geometry'] = mge_bta.geometry.centroid


## Mix MGE y Accesibilidad

In [145]:
temp = gpd.sjoin(mge_bta[['COD_DANE', 'geometry']], accessibility_df, how='left', op='within').reset_index()
temp = temp[['COD_DANE', 'hex']].copy()
temp = temp.dropna()

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4686
Right CRS: None

  temp = gpd.sjoin(mge_bta[['COD_DANE', 'geometry']], accessibility_df, how='left', op='within').reset_index()


In [146]:
temp.head()

Unnamed: 0,COD_DANE,hex
0,1100110000000011010101,8966e090c93ffff
1,1100110000000011010102,8966e090c97ffff
2,1100110000000011010103,8966e090c97ffff
3,1100110000000011010104,8966e090c83ffff
4,1100110000000011010105,8966e090c97ffff


In [152]:
censo_bogota_pers = censo_bogota_pers.merge(temp, how='left', left_on='COD_DANE_ANM', right_on='COD_DANE').reset_index(drop=True)

In [154]:
censo_bogota_pers.head()

Unnamed: 0,COD_DANE_ANM,CONDICION_FISICA,P_ALFABETA,PA1_GRP_ETNIC,P_SEXO,COD_DANE,hex
0,1100110000000051110309,2.0,1.0,6,1,1100110000000051110309,8966e092027ffff
1,1100110000000051110309,2.0,1.0,6,2,1100110000000051110309,8966e092027ffff
2,1100110000000051110309,2.0,1.0,6,2,1100110000000051110309,8966e092027ffff
3,1100110000000051110309,2.0,1.0,6,1,1100110000000051110309,8966e092027ffff
4,1100110000000051110111,2.0,1.0,6,2,1100110000000051110111,8966e092037ffff


In [156]:
def f(x):
    if x==1:
        return 1
    return 0
censo_bogota_pers['CONDICION_FISICA_1'] = censo_bogota_pers['CONDICION_FISICA'].apply(f)

In [157]:
censo_bogota_pers.head()

Unnamed: 0,COD_DANE_ANM,CONDICION_FISICA,P_ALFABETA,PA1_GRP_ETNIC,P_SEXO,COD_DANE,hex,CONDICION_FISICA_1
0,1100110000000051110309,2.0,1.0,6,1,1100110000000051110309,8966e092027ffff,0
1,1100110000000051110309,2.0,1.0,6,2,1100110000000051110309,8966e092027ffff,0
2,1100110000000051110309,2.0,1.0,6,2,1100110000000051110309,8966e092027ffff,0
3,1100110000000051110309,2.0,1.0,6,1,1100110000000051110309,8966e092027ffff,0
4,1100110000000051110111,2.0,1.0,6,2,1100110000000051110111,8966e092037ffff,0


In [160]:
censo_bogota_pers[['hex', 'CONDICION_FISICA_1']].groupby('hex').sum().reset_index()

Unnamed: 0,hex,CONDICION_FISICA_1
0,8966e082003ffff,425
1,8966e082007ffff,651
2,8966e08200bffff,113
3,8966e08200fffff,413
4,8966e082013ffff,648
...,...,...
3157,8966e4669a3ffff,1
3158,8966e4669a7ffff,2
3159,8966e4669b3ffff,12
3160,8966e4669b7ffff,0


In [161]:
def f(x):
    if x==2:
        return 1
    return 0
censo_bogota_pers['P_ALFABETA_1'] = censo_bogota_pers['P_ALFABETA'].apply(f)

In [170]:
censo_bogota_pers[['hex', 'P_ALFABETA_1']].groupby('hex').sum().reset_index()

Unnamed: 0,hex,P_ALFABETA_1
0,8966e082003ffff,98
1,8966e082007ffff,163
2,8966e08200bffff,29
3,8966e08200fffff,110
4,8966e082013ffff,126
...,...,...
3157,8966e4669a3ffff,0
3158,8966e4669a7ffff,2
3159,8966e4669b3ffff,13
3160,8966e4669b7ffff,0


In [8]:
censo_bogota_pers[censo_bogota_pers.P_SEXO==2].P_PARENTESCOR.value_counts()



3.0    1185486
1.0    1052926
2.0    1006754
4.0     427478
5.0      66779
Name: P_PARENTESCOR, dtype: int64