In [None]:
import requests
import pandas as pd
from tqdm import tqdm
import time

# --- CONFIGURACIÓN ---
city = "Santiago"
parameters = ["pm25", "pm10"]  # Puedes agregar otros: "no2", "co", "o3", etc.
date_from = "2023-01-01T00:00:00Z"  # Fecha inicio
date_to = "2023-12-31T23:59:00Z"    # Fecha fin
limit_per_request = 10000

def fetch_openaq_data(city, parameter, date_from, date_to, limit=limit_per_request):
    base_url = "https://api.openaq.org/v2/measurements"
    all_results = []
    page = 1
    total_pages = 1

    print(f"Descargando datos de {parameter} para {city}...")
    while page <= total_pages:
        params = {
            "city": city,
            "parameter": parameter,
            "date_from": date_from,
            "date_to": date_to,
            "limit": limit,
            "page": page,
            "sort": "asc",
            "order_by": "datetime",
            "country": "CL"
        }

        response = requests.get(base_url, params=params)
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            break

        data = response.json()
        results = data.get("results", [])
        all_results.extend(results)

        meta = data.get("meta", {})
        total_pages = meta.get("found", 0) // limit + 1

        print(f"Página {page}/{total_pages}")
        page += 1
        time.sleep(1)  # Para evitar sobrecargar la API

    return pd.DataFrame(all_results)

def process_data(df, parameter):
    if df.empty:
        return None

    df["datetime"] = pd.to_datetime(df["date"].apply(lambda x: x["utc"]))
    df = df[["datetime", "value", "location"]]
    df = df.rename(columns={"value": parameter})
    df = df.set_index("datetime").sort_index()

    # Agrega columna de ubicación si hay varias estaciones
    df = df.groupby("datetime")[parameter].mean().to_frame()
    return df

# --- DESCARGA Y UNIÓN DE PARÁMETROS ---
data_frames = []
for param in parameters:
    raw_df = fetch_openaq_data(city, param, date_from, date_to)
    clean_df = process_data(raw_df, param)
    if clean_df is not None:
        data_frames.append(clean_df)

# --- UNIR TODO EN UN SOLO DATAFRAME ---
if data_frames:
    df_all = pd.concat(data_frames, axis=1)
    df_all = df_all.resample("1H").mean().interpolate()
    df_all = df_all.reset_index()
    print("\nDatos procesados:")
    print(df_all.head())

    # Guardar en CSV
    df_all.to_csv(f"calidad_aire_santiago_{date_from[:4]}.csv", index=False)
    print(f"\n✅ Archivo guardado como 'calidad_aire_santiago_{date_from[:4]}.csv'")
else:
    print("No se pudo descargar ningún dato.")