In [None]:
import pandas as pd
from deltalake import write_deltalake, DeltaTable
from datetime import datetime
import requests
import logging
import os

# Colocar manualmente tu api key
url = "https://api.openweathermap.org/data/2.5/weather?lat=-31.135&lon=-64.1811&appid={your_api_key}&units=metric"
bronze_path = "../datalake/bronze/weather_data"
silver_path = "../datalake/silver/weather_data"

### **EXTRACT - Test**

In [None]:
# EXTRACT
def extract(url):
    """Extrae datos desde la API y los devuelve en formato JSON"""
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()
        data['main']['datetime'] = datetime.now().strftime("%Y-%m-%d %H")
        logging.info("Datos extraídos correctamente")
        return data
    except requests.exceptions.RequestException as e:
        logging.error(f"Error en la extracción: {e}")
    
raw_data = extract(url) # SE BORRA EN DAG
raw_data # SE BORRA EN DAG

{'coord': {'lon': -64.1811, 'lat': -31.135},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01n'}],
 'base': 'stations',
 'main': {'temp': 14.39,
  'feels_like': 13.9,
  'temp_min': 14.39,
  'temp_max': 14.39,
  'pressure': 1022,
  'humidity': 77,
  'sea_level': 1022,
  'grnd_level': 950,
  'datetime': '2025-03-10 00'},
 'visibility': 10000,
 'wind': {'speed': 2.67, 'deg': 346, 'gust': 3.26},
 'clouds': {'all': 2},
 'dt': 1741575952,
 'sys': {'type': 1,
  'id': 8226,
  'country': 'AR',
  'sunrise': 1741601613,
  'sunset': 1741646432},
 'timezone': -10800,
 'id': 3862525,
 'name': 'Carreta Quebrada',
 'cod': 200}

### **LOAD - Test**

In [None]:
# LOAD
def load(raw_data, bronze_path):
    """Carga los datos extraídos en la capa Bronze."""
    if raw_data is None:
        logging.error("No hay datos para cargar en Bronze.")
        return
    else:
        raw_data = pd.json_normalize(raw_data)
        if not os.path.exists(bronze_path):
            write_deltalake(bronze_path, raw_data, mode="append", partition_by=["main.datetime"])
            logging.info("Bronze inicializado con los datos actuales.")
            return raw_data # SE BORRA EN DAG
        else:
            new_dt = raw_data["main.datetime"].max()
            existing_data = DeltaTable(bronze_path).to_pandas()
            if new_dt in existing_data["main.datetime"].values:
                logging.warning("Ya existen datos agregados en Bronze para esta fecha y hora.")
            else:
                write_deltalake(bronze_path, raw_data, mode="append", partition_by=["main.datetime"])
                logging.info("Bronze inicializado con los datos actuales.")
        return raw_data # SE BORRA EN DAG


bronze_data = load(raw_data, bronze_path) # SE BORRA EN DAG
bronze_data # SE BORRA EN DAG


Unnamed: 0,weather,base,visibility,dt,timezone,id,name,cod,coord.lon,coord.lat,...,main.datetime,wind.speed,wind.deg,wind.gust,clouds.all,sys.type,sys.id,sys.country,sys.sunrise,sys.sunset
0,"[{'id': 800, 'main': 'Clear', 'description': '...",stations,10000,1741575952,-10800,3862525,Carreta Quebrada,200,-64.1811,-31.135,...,2025-03-10 00,2.67,346,3.26,2,1,8226,AR,1741601613,1741646432


### **TRANSFORM - Test**

In [None]:
# TRANSFORM
def transform(bronze_path, silver_path):
    """Transforma los datos de Bronze a Silver"""
    if not os.path.exists(bronze_path):
        logging.error("No se encontró la capa Bronze.")
        return
    else:
        df = DeltaTable(bronze_path).to_pandas()
        df = df.filter(like='main', axis=1)
        df.columns = df.columns.str.replace('main.', '', regex=False)
        conversion_mapping = {
            "temp": 'int8', "feels_like": 'int8', "temp_min": 'int8',
            "pressure": 'int8', "humidity": 'int8', "sea_level": 'int8',
            "grnd_level": 'int8', "datetime": 'datetime64[ns]'
        }
        df = df.astype(conversion_mapping)

        if not os.path.exists(silver_path):
            write_deltalake(silver_path, df, mode="append", partition_by=["datetime"])
            logging.info("Silver inicializado con los datos actuales.")
        else:
            new_dt = df["datetime"].max()
            existing_data = DeltaTable(silver_path).to_pandas()
            if new_dt in existing_data["datetime"].values:
                logging.info("No hay nuevos datos para cargar en Silver.")
            else:
                write_deltalake(silver_path, df, mode="append", partition_by=["datetime"])
                logging.info("Datos cargados en Silver.")
        return df # SE BORRA EN DAG

data = transform(bronze_path, silver_path) # SE BORRA EN DAG
data # SE BORRA EN DAG

Unnamed: 0,temp,feels_like,temp_min,temp_max,pressure,humidity,sea_level,grnd_level,datetime
0,14,13,14,14.39,-2,77,-2,-74,2025-03-10 00:00:00
1,15,15,15,15.39,-2,77,-2,-74,2025-03-09 23:00:00
2,16,16,16,16.39,-1,76,-1,-74,2025-03-09 22:00:00
