<a href="https://colab.research.google.com/github/Gobihanath/Live-Stream-Analysis-GB-NN/blob/main/API_data_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import csv
import os
import time
from datetime import datetime, timezone, date, timedelta
from google.colab import files

# ============================================================
# CONFIG
# ============================================================
LAT = 6.9271   # Colombo
LON = 79.8612

START_DATE = "2025-11-01"
END_DATE   = "2025-12-07"

LIVE_EVERY_SECONDS = 300  # 5 minutes
CSV_FILE = "air_quality_past_plus_live.csv"

AQ_VARS = [
    "european_aqi",
    "us_aqi",
    "pm10",
    "pm2_5",
    "carbon_monoxide",
    "carbon_dioxide",
    "nitrogen_dioxide",
    "sulphur_dioxide",
    "ozone",
    "ammonia",
    "aerosol_optical_depth",
    "dust",
    "uv_index",
    "uv_index_clear_sky"
]

WX_VARS = [
    "temperature_2m",
    "relative_humidity_2m",
    "wind_speed_10m",
    "wind_direction_10m",
    "surface_pressure"
]

ALL_VARS = AQ_VARS + WX_VARS
BASE_URL = "https://air-quality-api.open-meteo.com/v1/air-quality"
HISTORY_CHUNK_DAYS = 20

# ============================================================
# HELPERS
# ============================================================
def iso_date(s):
    y, m, d = s.split("-")
    return date(int(y), int(m), int(d))

def ensure_csv_header(path, fieldnames):
    if not os.path.isfile(path):
        with open(path, "w", newline="", encoding="utf-8") as f:
            csv.DictWriter(f, fieldnames=fieldnames).writeheader()

def append_rows(path, fieldnames, rows):
    with open(path, "a", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        for r in rows:
            w.writerow(r)

def fetch_hourly_history(lat, lon, start_d, end_d, vars_list):
    params = {
        "latitude": lat,
        "longitude": lon,
        "hourly": ",".join(vars_list),
        "start_date": start_d,
        "end_date": end_d,
        "timezone": "auto"
    }
    r = requests.get(BASE_URL, params=params, timeout=30)
    r.raise_for_status()
    data = r.json()

    hourly = data.get("hourly", {})
    times = hourly.get("time", [])
    rows = []
    fetched_at = datetime.now(timezone.utc).isoformat()

    for i, t in enumerate(times):
        row = {"fetched_at": fetched_at, "model_time": t}
        for v in vars_list:
            row[v] = hourly[v][i] if v in hourly else None
        rows.append(row)
    return rows

def fetch_current(lat, lon, vars_list):
    params = {
        "latitude": lat,
        "longitude": lon,
        "current": ",".join(vars_list),
        "timezone": "auto"
    }
    r = requests.get(BASE_URL, params=params, timeout=30)
    r.raise_for_status()
    cur = r.json().get("current", {})

    row = {
        "fetched_at": datetime.now(timezone.utc).isoformat(),
        "model_time": cur.get("time")
    }
    for v in vars_list:
        row[v] = cur.get(v)
    return row

def daterange_chunks(start, end, days):
    cur = start
    while cur <= end:
        chunk_end = min(cur + timedelta(days=days - 1), end)
        yield cur.isoformat(), chunk_end.isoformat()
        cur = chunk_end + timedelta(days=1)

# ============================================================
# MAIN
# ============================================================
def main():
    fieldnames = ["fetched_at", "model_time"] + ALL_VARS
    ensure_csv_header(CSV_FILE, fieldnames)

    # ----------------------------
    # A) Download past data
    # ----------------------------
    print("Downloading past (hourly) data...")

    start_dt = iso_date(START_DATE)
    end_dt = iso_date(END_DATE)

    total = 0
    for s, e in daterange_chunks(start_dt, end_dt, HISTORY_CHUNK_DAYS):
        rows = fetch_hourly_history(LAT, LON, s, e, ALL_VARS)
        append_rows(CSV_FILE, fieldnames, rows)
        total += len(rows)

    print(f"Past data saved ({total} rows).")

    # ----------------------------
    # B) DOWNLOAD CSV NOW
    # ----------------------------
    print("Downloading CSV to your computer...")
    files.download(CSV_FILE)

    # ----------------------------
    # C) Live loop
    # ----------------------------
    print("Starting live data collection (every 5 minutes)...")
    while True:
        try:
            row = fetch_current(LAT, LON, ALL_VARS)
            append_rows(CSV_FILE, fieldnames, [row])
            print("LIVE:", row["fetched_at"], "| model:", row["model_time"])
        except Exception as e:
            print("ERROR:", e)

        time.sleep(LIVE_EVERY_SECONDS)

# Run
main()


Downloading past (hourly) data...
Past data saved (888 rows).
Downloading CSV to your computer...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Starting live data collection (every 5 minutes)...
LIVE: 2025-12-27T04:36:18.996921+00:00 | model: 2025-12-27T09:30


KeyboardInterrupt: 