https://docs.openaq.org/using-the-api/dates-datetimes

In [31]:
from dataclasses import dataclass
from typing import Optional, List, Dict, Any

@dataclass
class Meta:
    name: str
    website: str
    page: int
    limit: int
    found: int

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "Meta":
        return cls(
            name=data.get("name"),
            website=data.get("website"),
            page=data.get("page"),
            limit=data.get("limit"),
            found=data.get("found"),
        )

@dataclass
class Country:
    id: int
    code: str
    name: str

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Country":
        return cls(id=d["id"], code=d["code"], name=d["name"])

@dataclass
class Owner:
    id: int
    name: str

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Owner":
        return cls(id=d["id"], name=d["name"])

@dataclass
class Provider:
    id: int
    name: str

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Provider":
        return cls(id=d["id"], name=d["name"])

@dataclass
class Instrument:
    id: int
    name: str

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Instrument":
        return cls(id=d["id"], name=d["name"])

@dataclass
class Parameter:
    id: int
    name: str
    units: str
    displayName: str

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Parameter":
        return cls(
            id=d["id"],
            name=d["name"],
            units=d["units"],
            displayName=d["displayName"],
        )

@dataclass
class Sensor:
    id: int
    name: str
    parameter: Parameter

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Sensor":
        return cls(
            id=d["id"],
            name=d["name"],
            parameter=Parameter.from_dict(d["parameter"])
        )

@dataclass
class Coordinates:
    latitude: float
    longitude: float

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Coordinates":
        return cls(latitude=d["latitude"], longitude=d["longitude"])

@dataclass
class Attribution:
    name: str
    url: Optional[str]

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Attribution":
        return cls(name=d.get("name", ""), url=d.get("url"))

@dataclass
class License:
    id: int
    name: str
    attribution: Attribution
    dateFrom: Optional[str]
    dateTo: Optional[str]

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "License":
        return cls(
            id=d["id"],
            name=d["name"],
            attribution=Attribution.from_dict(d["attribution"]),
            dateFrom=d.get("dateFrom"),
            dateTo=d.get("dateTo"),
        )

@dataclass
class DateTimePair:
    utc: str
    local: str

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "DateTimePair":
        return cls(utc=d["utc"], local=d["local"])

@dataclass
class LocationResult:
    id: int
    name: str
    locality: Optional[str]
    timezone: Optional[str]
    country: Country
    owner: Owner
    provider: Provider
    isMobile: bool
    isMonitor: bool
    instruments: List[Instrument]
    sensors: List[Sensor]
    coordinates: Coordinates
    licenses: List[License]
    bounds: Optional[List[float]]
    distance: Optional[float]
    datetimeFirst: Optional[DateTimePair]
    datetimeLast: Optional[DateTimePair]

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "LocationResult":
        return cls(
            id=d["id"],
            name=d["name"],
            locality=d.get("locality"),
            timezone=d.get("timezone"),
            country=Country.from_dict(d["country"]),
            owner=Owner.from_dict(d["owner"]),
            provider=Provider.from_dict(d["provider"]),
            isMobile=d.get("isMobile", False),
            isMonitor=d.get("isMonitor", False),
            instruments=[Instrument.from_dict(x) for x in d.get("instruments", [])],
            sensors=[Sensor.from_dict(x) for x in d.get("sensors", [])],
            coordinates=Coordinates.from_dict(d["coordinates"]),
            licenses=[License.from_dict(x) for x in d.get("licenses", [])],
            bounds=d.get("bounds"),
            distance=d.get("distance"),
            datetimeFirst=DateTimePair.from_dict(d["datetimeFirst"]) if d.get("datetimeFirst") else None,
            datetimeLast=DateTimePair.from_dict(d["datetimeLast"]) if d.get("datetimeLast") else None,
        )
    
@dataclass
class Parameter:
    id: int
    name: str
    units: str
    displayName: Optional[str]

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Parameter":
        return cls(
            id=d["id"],
            name=d["name"],
            units=d["units"],
            displayName=d.get("displayName"),
        )

@dataclass
class Period:
    label: str
    interval: str
    datetimeFrom: DateTimePair
    datetimeTo: DateTimePair

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Period":
        return cls(
            label=d["label"],
            interval=d["interval"],
            datetimeFrom=DateTimePair.from_dict(d["datetimeFrom"]),
            datetimeTo=DateTimePair.from_dict(d["datetimeTo"]),
        )

@dataclass
class Coverage:
    expectedCount: int
    expectedInterval: str
    observedCount: int
    observedInterval: str
    percentComplete: float
    percentCoverage: float
    datetimeFrom: DateTimePair
    datetimeTo: DateTimePair

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Coverage":
        return cls(
            expectedCount=d["expectedCount"],
            expectedInterval=d["expectedInterval"],
            observedCount=d["observedCount"],
            observedInterval=d["observedInterval"],
            percentComplete=d["percentComplete"],
            percentCoverage=d["percentCoverage"],
            datetimeFrom=DateTimePair.from_dict(d["datetimeFrom"]),
            datetimeTo=DateTimePair.from_dict(d["datetimeTo"]),
        )

@dataclass
class FlagInfo:
    hasFlags: bool

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "FlagInfo":
        return cls(hasFlags=d["hasFlags"])

@dataclass
class Measurement:
    value: float
    flagInfo: FlagInfo
    parameter: Parameter
    period: Period
    coordinates: Optional[Dict[str, float]]
    summary: Optional[str]
    coverage: Coverage

    @classmethod
    def from_dict(cls, d: Dict[str, Any]) -> "Measurement":
        return cls(
            value=d["value"],
            flagInfo=FlagInfo.from_dict(d["flagInfo"]),
            parameter=Parameter.from_dict(d["parameter"]),
            period=Period.from_dict(d["period"]),
            coordinates=d.get("coordinates"),
            summary=d.get("summary"),
            coverage=Coverage.from_dict(d["coverage"]),
        )


In [32]:

import os
import requests
import json

KEY_PATH = os.path.expanduser("notebooks/openaq_api_key")
with open(KEY_PATH, "r") as f:
    API_KEY = f.read().strip()

headers = {"X-API-Key": API_KEY}

url = "https://api.openaq.org/v3/locations/9369"
resp = requests.get(url, headers=headers, timeout=30)
resp.raise_for_status()

data = resp.json()
print(json.dumps(data, indent=2, ensure_ascii=False))

meta = Meta.from_dict(data["meta"])
print(meta)

locations: List[LocationResult] = [LocationResult.from_dict(x) for x in data.get("results", [])]
print(locations)

{
  "meta": {
    "name": "openaq-api",
    "website": "/",
    "page": 1,
    "limit": 100,
    "found": 1
  },
  "results": [
    {
      "id": 9369,
      "name": "IS-1",
      "locality": "IASI",
      "timezone": "Europe/Bucharest",
      "country": {
        "id": 74,
        "code": "RO",
        "name": "Romania"
      },
      "owner": {
        "id": 4,
        "name": "Unknown Governmental Organization"
      },
      "provider": {
        "id": 70,
        "name": "EEA"
      },
      "isMobile": false,
      "isMonitor": true,
      "instruments": [
        {
          "id": 2,
          "name": "Government Monitor"
        }
      ],
      "sensors": [
        {
          "id": 7851678,
          "name": "co µg/m³",
          "parameter": {
            "id": 4,
            "name": "co",
            "units": "µg/m³",
            "displayName": "CO mass"
          }
        },
        {
          "id": 28602,
          "name": "no2 µg/m³",
          "parameter": {
         

In [33]:
for loc in locations:
    print(f"\nLocation: {loc.name} ({loc.locality})")
    for sensor in loc.sensors:
        print("--------------------------")
        print(f"  Sensor ID: {sensor.id}")
        print(f"  Name: {sensor.name}")
        print(f"  Parameter: {sensor.parameter.displayName} ({sensor.parameter.units})")


Location: IS-1 (IASI)
--------------------------
  Sensor ID: 7851678
  Name: co µg/m³
  Parameter: CO mass (µg/m³)
--------------------------
  Sensor ID: 28602
  Name: no2 µg/m³
  Parameter: NO₂ mass (µg/m³)
--------------------------
  Sensor ID: 1853201
  Name: pm10 µg/m³
  Parameter: PM10 (µg/m³)
--------------------------
  Sensor ID: 1853199
  Name: pm25 µg/m³
  Parameter: PM2.5 (µg/m³)
--------------------------
  Sensor ID: 7851681
  Name: so2 µg/m³
  Parameter: SO₂ mass (µg/m³)


In [43]:
for y in range(2020, 2026):
    !aws s3 cp \
        --no-sign-request \
        --recursive \
        s3://openaq-data-archive/records/csv.gz/locationid=9369/year={y}/ \
        data/openaq/9369/{y}

download: s3://openaq-data-archive/records/csv.gz/locationid=9369/year=2020/month=07/location-9369-20200730.csv.gz to data/openaq/9369/2020/month=07/location-9369-20200730.csv.gz
download: s3://openaq-data-archive/records/csv.gz/locationid=9369/year=2020/month=08/location-9369-20200805.csv.gz to data/openaq/9369/2020/month=08/location-9369-20200805.csv.gz
download: s3://openaq-data-archive/records/csv.gz/locationid=9369/year=2020/month=08/location-9369-20200804.csv.gz to data/openaq/9369/2020/month=08/location-9369-20200804.csv.gz
download: s3://openaq-data-archive/records/csv.gz/locationid=9369/year=2020/month=08/location-9369-20200807.csv.gz to data/openaq/9369/2020/month=08/location-9369-20200807.csv.gz
download: s3://openaq-data-archive/records/csv.gz/locationid=9369/year=2020/month=08/location-9369-20200812.csv.gz to data/openaq/9369/2020/month=08/location-9369-20200812.csv.gz
download: s3://openaq-data-archive/records/csv.gz/locationid=9369/year=2020/month=08/location-9369-202008

In [45]:
from pathlib import Path
import pandas as pd

BASE = Path("data/openaq/9369")
YEARS = range(2020, 2026)  # inclusive
OUT = "data/combined_openaq_9369_2020_2025.csv"

CHUNKSIZE = 100_000
first_write = True

for y in YEARS:
    year_dir = BASE / str(y)
    if not year_dir.exists():
        continue

    # find all gzipped CSVs recursively
    files = sorted(year_dir.rglob("*.csv.gz"))
    for f in files:
        # stream in chunks
        for chunk in pd.read_csv(f, compression="gzip", chunksize=CHUNKSIZE, low_memory=False):
            chunk.to_csv(
                OUT,
                mode="w" if first_write else "a",
                index=False,
                header=first_write
            )
            if first_write:
                first_write = False
    print(f"Finished year {y}, {len(files)} files merged.")

print(f"Done. Combined CSV saved at {OUT}")

Finished year 2020, 143 files merged.
Finished year 2021, 336 files merged.
Finished year 2022, 92 files merged.
Finished year 2023, 0 files merged.
Finished year 2024, 306 files merged.
Finished year 2025, 223 files merged.
Done. Combined CSV saved at data/combined_openaq_9369_2020_2025.csv


In [49]:
BASE_URL = "https://www.ncei.noaa.gov/pub/data/noaa/isd-lite"
STATION_USAF = "150900"          # Iași (LRIA) WMO/USAF code
WBAN_CANDIDATES = ["99999", "00000"]  # try both, some stations use 00000
YEARS = range(2020, 2026)        # inclusive 2020..2025
OUTDIR = Path("data/meteo/isd-lite/150900")
OUTDIR.mkdir(parents=True, exist_ok=True)
TIMEOUT = 60

def download_isd_lite_year(year: int) -> bool:
    """
    Try downloading 150900-<WBAN>-<year>.gz from the NOAA ISD-Lite HTTPS directory.
    Returns True if a file was downloaded or already exists, False if not found.
    """
    for wban in WBAN_CANDIDATES:
        fname = f"{STATION_USAF}-{wban}-{year}.gz"
        url = f"{BASE_URL}/{year}/{fname}"
        out = OUTDIR / fname
        if out.exists() and out.stat().st_size > 0:
            print(f"[skip] {out} (exists)")
            return True
        try:
            # HEAD first (fast 404 check), then GET if exists
            h = requests.head(url, timeout=TIMEOUT)
            if h.status_code == 200:
                print(f"[get ] {url}")
                r = requests.get(url, timeout=TIMEOUT)
                r.raise_for_status()
                out.write_bytes(r.content)
                print(f"[save] {out} ({out.stat().st_size:,} bytes)")
                return True
            else:
                # try next WBAN candidate
                continue
        except requests.RequestException as e:
            print(f"[warn] {url} -> {e}; trying next candidate...")
            continue
    print(f"[miss] No file found for year {year} (tried WBAN={WBAN_CANDIDATES})")
    return False

if __name__ == "__main__":
    for y in YEARS:
        download_isd_lite_year(y)

[skip] data/meteo/isd-lite/150900/150900-99999-2020.gz (exists)
[skip] data/meteo/isd-lite/150900/150900-99999-2021.gz (exists)
[skip] data/meteo/isd-lite/150900/150900-99999-2022.gz (exists)
[skip] data/meteo/isd-lite/150900/150900-99999-2023.gz (exists)
[skip] data/meteo/isd-lite/150900/150900-99999-2024.gz (exists)
[skip] data/meteo/isd-lite/150900/150900-99999-2025.gz (exists)


In [50]:
INDIR = Path("data/meteo/isd-lite/150900")
OUTFILE = Path("data/combined_isdlite_150900_2020_2025.csv")

# Define fixed-width column specs and names from ISD-Lite format
colspecs = [
    (0, 4),   # year
    (5, 7),   # month
    (8, 10),  # day
    (11, 13), # hour
    (14, 19), # air temperature
    (20, 25), # dew point
    (26, 32), # sea level pressure
    (33, 36), # wind direction
    (37, 41), # wind speed
    (42, 45), # sky cover
    (46, 49), # precipitation depth
]
names = [
    "year","month","day","hour",
    "temp_C","dewpoint_C","slp_hPa",
    "wind_dir_deg","wind_speed_ms","sky_cover","precip_mm"
]

frames = []
for f in sorted(INDIR.glob("*.gz")):
    print(f"Reading {f.name} ...")
    df = pd.read_fwf(f, colspecs=colspecs, names=names, compression="gzip")
    # Build datetime from components
    df["datetime"] = pd.to_datetime(df[["year","month","day","hour"]], errors="coerce")
    df = df.set_index("datetime").drop(columns=["year","month","day","hour"])
    frames.append(df)

# Concatenate all years
combined = pd.concat(frames).sort_index()

# Save as single CSV
combined.to_csv(OUTFILE)
print(f"Saved combined file -> {OUTFILE} with {len(combined):,} rows")

Reading 150900-99999-2020.gz ...
Reading 150900-99999-2021.gz ...
Reading 150900-99999-2022.gz ...
Reading 150900-99999-2023.gz ...
Reading 150900-99999-2024.gz ...
Reading 150900-99999-2025.gz ...
Saved combined file -> data/combined_isdlite_150900_2020_2025.csv with 48,674 rows
