In [4]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import json
import psycopg2
from sodapy import Socrata
from transform_dag import transformations_api_data

In [2]:
def read_db_water():
    with open('db_config.json') as file:
        db_config = json.load(file)

    engine = create_engine(f'postgresql+psycopg2://{db_config["user"]}:{db_config["password"]}@{db_config["host"]}:5432/{db_config["dbname"]}')

    water = pd.read_sql('SELECT * FROM water_table', engine)
    
    return water

In [3]:
from transform_dag import apply_transformations

def transform_water():
    
    water = read_db_water()
    
    water = apply_transformations(water)
    
    print(water.head())

# Llamar a la función principal para procesar los datos
transform_water()



2024-05-03 12:59:25,578 - INFO - Starting transformations on water data.
2024-05-03 12:59:25,815 - INFO - Standardized place names.
2024-05-03 12:59:25,846 - INFO - Scaled numerical columns.
2024-05-03 12:59:25,908 - INFO - Filtered top influential parameters.
2024-05-03 12:59:26,010 - INFO - Classified IRCA values into categories.
2024-05-03 12:59:28,655 - INFO - Categorized treatment data.
2024-05-03 12:59:32,809 - INFO - Critical Proportion.
2024-05-03 12:59:32,860 - INFO - Dropped unnecessary columns.
2024-05-03 12:59:32,861 - INFO - Column names standardized
2024-05-03 12:59:32,861 - INFO - Standardized column names.
2024-05-03 12:59:32,862 - INFO - All transformations applied successfully.


          año nombredepartamento  div_dpto nombremunicipio  divi_muni  \
0  2010-01-01            Bolívar        13        El Guamo      13248   
1  2010-01-01            Bolívar        13        El Guamo      13248   
2  2010-01-01            Bolívar        13        El Guamo      13248   
3  2010-01-01            Bolívar        13        El Guamo      13248   
4  2010-01-01            Bolívar        13        El Guamo      13248   

   ircaminimo  ircamaximo  ircapromedio nombreparametroanalisis2  \
0         0.0       100.0         37.32        Alcanilidad Total   
1         0.0       100.0         37.32                 Aluminio   
2         0.0       100.0         37.32                 Arsénico   
3         0.0       100.0         37.32                   Cadmio   
4         0.0       100.0         37.32                   Calcio   

   numeroparametrospromedio  is_top_15   rango_irca tratamientocategoría  \
0                         2       True  Riesgo alto  Tratamiento parcial   


In [6]:
def extract_api(endpoint):
    try:
        # Inicializa el cliente de Socrata sin token de aplicación
        client = Socrata("www.datos.gov.co", None)

        # Obtiene los datos desde el endpoint especificado
        results = client.get(endpoint, limit=2000)

        # Convierte los resultados en un DataFrame
        api = pd.DataFrame.from_records(results)

        return api
    except Exception as e:
        print(f"Se produjo un error: {e}")
        return pd.DataFrame()  # Devuelve un DataFrame vacío en caso de error


In [20]:

def transform_api():
    
    pd.set_option('display.max_columns', None)  # Ninguna limitación en el número de columnas a mostrar
    pd.set_option('display.max_rows', None)  # Ninguna limitación en el número de filas a mostrar
    
    api = extract_api("tcwu-r53g")
    
    api = transformations_api_data(api)
    
    print(api.head())

transform_api()

2024-05-03 18:20:16,379 - INFO - Starting transformations on API data.
2024-05-03 18:20:16,385 - INFO - Dates converted successfully.
2024-05-03 18:20:16,394 - INFO - Text columns normalized successfully.
2024-05-03 18:20:16,396 - INFO - Number of municipalities computed successfully.
2024-05-03 18:20:16,400 - INFO - Regions mapped successfully.
2024-05-03 18:20:16,402 - INFO - Project financing calculated successfully.
2024-05-03 18:20:16,405 - INFO - Project duration calculated successfully.
2024-05-03 18:20:16,408 - INFO - Unnecessary columns dropped successfully.
2024-05-03 18:20:16,411 - INFO - All transformations applied successfully.


  c_digo_divipola_departamento departamento c_digo_divipola_municipio  \
0                           18      CAQUETA       18029, 18205, 18610   
1                           15       BOYACA                     15693   
2                           17       CALDAS                     17050   
3                           17       CALDAS                     17446   
4                           05    ANTIOQUIA                      5250   

                                           municipio  \
0  albania(caq), curillo(caq), san jose de fragua...   
1                         santa rosa de viterbo(boy)   
2                                      aranzazu(cal)   
3                                     marulanda(cal)   
4                                      el bagre(ant)   

                                           indicador  \
0       nueva población beneficiada acueducto urbano   
1       nueva población beneficiada acueducto urbano   
2  nueva población beneficiada alcantarillado urbano   
