In [2]:
import requests
import pandas as pd
from tqdm import tqdm
from datetime import datetime, timedelta
import time

# --- CONFIGURACIÓN ---
city = "Santiago"
country = "CL"
parameters = ["pm25", "pm10"]  # Puedes agregar "no2", "co", etc.
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)
base_url = "https://api.openaq.org/v3/measurements"
sleep_time = 1  # segundos entre requests

def fetch_daily_data(parameter, date_from, date_to):
    all_data = []
    page = 1
    page_size = 10000

    while True:
        params = {
            "city": city,
            "country": country,
            "parameter": parameter,
            "date_from": date_from.isoformat() + "Z",
            "date_to": date_to.isoformat() + "Z",
            "limit": page_size,
            "page": page,
            "sort": "asc"
        }

        response = requests.get(base_url, params=params)
        if response.status_code != 200:
            print(f"❌ Error {response.status_code}: {response.text}")
            break

        data = response.json()
        results = data.get("results", [])
        if not results:
            break

        all_data.extend(results)

        meta = data.get("meta", {})
        total_pages = meta.get("found", 0) // page_size + 1

        if page >= total_pages:
            break

        page += 1
        time.sleep(sleep_time)

    return pd.DataFrame(all_data)

def download_parameter(parameter):
    print(f"\n📦 Descargando datos de '{parameter}'...")
    current_date = start_date
    all_df = []

    while current_date <= end_date:
        next_day = current_date + timedelta(days=1)
        df = fetch_daily_data(parameter, current_date, next_day)
        if not df.empty:
            all_df.append(df)
        current_date = next_day

    return pd.concat(all_df, ignore_index=True) if all_df else pd.DataFrame()

def process_df(df, parameter):
    if df.empty:
        return None

    df["datetime"] = pd.to_datetime(df["date"]["utc"])
    df = df[["datetime", "value", "location"]]
    df = df.rename(columns={"value": parameter})
    df = df.set_index("datetime").sort_index()
    df = df.groupby("datetime")[parameter].mean().to_frame()
    return df

# --- DESCARGA Y PROCESAMIENTO ---
data_frames = []
for param in parameters:
    raw_df = download_parameter(param)
    proc_df = process_df(raw_df, param)
    if proc_df is not None:
        data_frames.append(proc_df)

# --- UNIÓN Y SALIDA ---
if data_frames:
    df_all = pd.concat(data_frames, axis=1)
    df_all = df_all.resample("1H").mean().interpolate()
    df_all = df_all.reset_index()
    print("\n✅ Datos procesados:")
    print(df_all.head())

    output_file = f"calidad_aire_santiago_{start_date.year}.csv"
    df_all.to_csv(output_file, index=False)
    print(f"\n📁 Guardado como: {output_file}")
else:
    print("⚠️ No se descargaron datos.")



📦 Descargando datos de 'pm25'...
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be provided in the X-API-Key header."}
❌ Error 401: {"message": "Unauthorized. A valid API key must be pr

KeyboardInterrupt: 