In [2]:
# Python
from pathlib import Path
import json

# Database  
import sqlite3

# Data analysis
import pandas as pd

# Geospatial
import geopandas as gpd
import shapely.wkb
from shapely.wkb import loads

In [3]:
current_dir = Path.cwd()
parent_dir = current_dir.parent

In [4]:
# Database
conn = sqlite3.connect(parent_dir / 'data' / 'caf_accessibility.db')

In [91]:
bogota_cuenca_df_parquet = pd.read_parquet(
    parent_dir / 'data' / 'bogota_cuenca_v2.parquet'
)

bogota_cuenca_gdf_geo = gpd.read_parquet(
    parent_dir / 'data' / 'bogota_cuenca_geo_v2.parquet',
)

In [96]:
bogota_cuenca_df = pd.concat([bogota_cuenca_df_parquet, bogota_cuenca_gdf_geo[['geometry']]], axis=1)

In [113]:
hospitales_gdf = gpd.read_parquet(
    parent_dir / 'data' / 'Bogota_SaluHosp.parquet'
)

In [117]:
espacios_verdes_gdf = gpd.read_parquet(
    parent_dir / 'data' / 'Bogota_EspaVerd.parquet'
)

In [5]:
atencion_primaria_gdf = gpd.read_file(
    parent_dir / 'data' / 'Bogotá_SaluAtenPrim.geojson'
)


In [23]:
Bogota_EducPrimPriv_gdf = gpd.read_file(
    parent_dir / 'data' / 'Bogotá_EducPrimPriv.geojson'
)
Bogota_EducPrimPubl_gdf = gpd.read_file(
    parent_dir / 'data' / 'Bogotá_EducPrimPubl.geojson'
)

In [24]:
Bogota_EducPrimPriv_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1274 entries, 0 to 1273
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   hex_d     1274 non-null   object  
 1   destino   1274 non-null   object  
 2   nombre    1274 non-null   object  
 3   tipo      1274 non-null   object  
 4   oferta    1274 non-null   object  
 5   geometry  1274 non-null   geometry
dtypes: geometry(1), object(5)
memory usage: 59.8+ KB


In [25]:
Bogota_EducPrimPubl_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 662 entries, 0 to 661
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   hex_d     662 non-null    object  
 1   destino   662 non-null    object  
 2   nombre    662 non-null    object  
 3   tipo      662 non-null    object  
 4   oferta    662 non-null    object  
 5   geometry  662 non-null    geometry
dtypes: geometry(1), object(5)
memory usage: 31.2+ KB


In [26]:
Bogota_EducPrimPubl_gdf.head()

Unnamed: 0,hex_d,destino,nombre,tipo,oferta,geometry
0,8866e00023fffff,"4.056654, -74.325743",COLEGIO DISTRITAL RURAL JUAN DE LA CRUZ VARELA...,Educación Primaria,Pública,POINT (-74.32574 4.05665)
1,8866e00061fffff,"4.09417, -74.328902",COLEGIO DISTRITAL RURAL JUAN DE LA CRUZ VARELA...,Educación Primaria,Pública,POINT (-74.32890 4.09417)
2,8866e000adfffff,"4.030324, -74.300455",COLEGIO DISTRITAL RURAL JUAN DE LA CRUZ VARELA...,Educación Primaria,Pública,POINT (-74.30045 4.03032)
3,8866e001bdfffff,"4.008714, -74.343767",COLEGIO DISTRITAL RURAL JUAN DE LA CRUZ VARELA...,Educación Primaria,Pública,POINT (-74.34377 4.00871)
4,8866e001e3fffff,"4.031682, -74.350906",COLEGIO DISTRITAL RURAL JUAN DE LA CRUZ VARELA...,Educación Primaria,Pública,POINT (-74.35091 4.03168)


In [29]:
Bogota_EducPrim_gdf = pd.concat([Bogota_EducPrimPubl_gdf, Bogota_EducPrimPriv_gdf], axis=0)

### Educacion inicial

In [33]:
Bogotá_EducInicPriv_gdf = gpd.read_file(
    parent_dir / 'data' / 'Bogotá_EducInicPriv.geojson'
)
Bogotá_EducInicPubl_gdf = gpd.read_file(
    parent_dir / 'data' / 'Bogotá_EducInicPubl.geojson'
)

In [34]:
Bogotá_EducInicPriv_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1076 entries, 0 to 1075
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   hex_d     1076 non-null   object  
 1   destino   1076 non-null   object  
 2   nombre    1076 non-null   object  
 3   tipo      1076 non-null   object  
 4   oferta    1076 non-null   object  
 5   geometry  1076 non-null   geometry
dtypes: geometry(1), object(5)
memory usage: 50.6+ KB


In [35]:
Bogotá_EducInicPubl_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 657 entries, 0 to 656
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   hex_d     657 non-null    object  
 1   destino   657 non-null    object  
 2   nombre    657 non-null    object  
 3   tipo      657 non-null    object  
 4   oferta    657 non-null    object  
 5   geometry  657 non-null    geometry
dtypes: geometry(1), object(5)
memory usage: 30.9+ KB


In [36]:
Bogotá_EducInic_gdf = pd.concat([Bogotá_EducInicPubl_gdf, Bogotá_EducInicPriv_gdf], axis=0)

### Educacion secundaria

In [5]:
Bogotá_EducSecuPriv_gdf = gpd.read_file(
    parent_dir / 'data' / 'Bogotá_EducSecuPriv.geojson'
)
Bogotá_EducSecuPubl_gdf = gpd.read_file(
    parent_dir / 'data' / 'Bogotá_EducSecuPubl.geojson'
)

In [6]:
Bogotá_EducSecuPriv_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   hex_d     1258 non-null   object  
 1   destino   1258 non-null   object  
 2   nombre    1258 non-null   object  
 3   tipo      1258 non-null   object  
 4   oferta    1258 non-null   object  
 5   geometry  1258 non-null   geometry
dtypes: geometry(1), object(5)
memory usage: 59.1+ KB


In [7]:
Bogotá_EducSecuPubl_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 491 entries, 0 to 490
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   hex_d     491 non-null    object  
 1   destino   491 non-null    object  
 2   nombre    491 non-null    object  
 3   tipo      491 non-null    object  
 4   oferta    491 non-null    object  
 5   geometry  491 non-null    geometry
dtypes: geometry(1), object(5)
memory usage: 23.1+ KB


In [8]:
Bogotá_EducSecu_gdf = pd.concat([Bogotá_EducSecuPriv_gdf, Bogotá_EducSecuPubl_gdf], axis=0)

In [10]:
def save_geo_in_sqlite_db(conn:sqlite3.Connection, table_name, geo_dict_json):
    c = conn.cursor()
    c.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='Geos'")
    result = c.fetchone()
    if not result:
        c.execute('''CREATE TABLE Geos (id INTEGER PRIMARY KEY AUTOINCREMENT, table_ TEXT UNIQUE, geometry TEXT)''')
    c.execute('''INSERT INTO Geos (table_, geometry) VALUES (?, ?)''', (table_name, geo_dict_json))
    conn.commit()

def save_dataframe_in_sqlite_db(df:pd.DataFrame, table_name:str, conn:sqlite3.Connection):
    if 'geometry' not in df.columns:
        raise ValueError('Dataframe must have a geometry column')
    bogota_cuenca_df_geo = gpd.GeoDataFrame(df[['geometry']], geometry='geometry')
    geo_dict_json = json.dumps(bogota_cuenca_df_geo[['geometry']].__geo_interface__)
    save_geo_in_sqlite_db(conn, table_name, geo_dict_json)
    df.drop('geometry', axis=1).to_sql(table_name, conn, if_exists='replace', index=False)



In [None]:
save_dataframe_in_sqlite_db(df=bogota_cuenca_df, table_name='Accessibility', conn=conn)

In [116]:
save_dataframe_in_sqlite_db(df=hospitales_gdf, table_name='Hospitals', conn=conn)

<class 'geopandas.geodataframe.GeoDataFrame'>


In [118]:
save_dataframe_in_sqlite_db(df=espacios_verdes_gdf, table_name='Green_Areas', conn=conn)

<class 'geopandas.geodataframe.GeoDataFrame'>


In [9]:
save_dataframe_in_sqlite_db(df=atencion_primaria_gdf, table_name='Primary_Health_Care', conn=conn)

In [30]:
save_dataframe_in_sqlite_db(df=Bogota_EducPrim_gdf, table_name='Primary_Education', conn=conn)


In [38]:
save_dataframe_in_sqlite_db(df=Bogotá_EducInic_gdf, table_name='Early_Education', conn=conn)


In [11]:
save_dataframe_in_sqlite_db(df=Bogotá_EducSecu_gdf, table_name='Secondary_Education', conn=conn)