In [1]:
import requests
import pandas as pd
import folium
from folium.plugins import HeatMap
from datetime import date, timedelta


In [2]:
API_KEY = "b654dc40b92bdd512956b2478836f207278e54d27dca9cdf1f20ab728bd7bd16"
BASE_URL = "https://api.openaq.org/v3"

COUNTRIES = ["EE", "LV", "LT"]  # Baltics only


In [3]:
def get_locations_for_country(iso_code, limit=100, page=1):
    url = f"{BASE_URL}/locations"
    params = {
        "iso": iso_code,
        "limit": limit,
        "page": page,
    }
    headers = {"X-API-Key": API_KEY}
    r = requests.get(url, params=params, headers=headers)
    r.raise_for_status()
    return r.json()


In [4]:
def locations_to_dataframe(json_obj, iso_code):
    rows = []
    for loc in json_obj["results"]:
        sensors = loc.get("sensors", [])
        for sensor in sensors:
            rows.append(
                {
                    "location_id": loc["id"],
                    "location_name": loc.get("name"),
                    "country_code": iso_code,
                    "locality": loc.get("locality"),
                    "timezone": loc.get("timezone"),
                    "latitude": loc["coordinates"]["latitude"],
                    "longitude": loc["coordinates"]["longitude"],
                    "sensor_id": sensor["id"],
                    "parameter": sensor["parameter"]["name"],
                    "units": sensor["parameter"]["units"],
                }
            )
    return pd.DataFrame(rows)


In [5]:
def get_daily_measurements_for_sensor(sensor_id, date_from, date_to, limit=1000, page=1):
    url = f"{BASE_URL}/sensors/{sensor_id}/days"
    params = {
        "date_from": date_from,
        "date_to": date_to,
        "limit": limit,
        "page": page,
    }
    headers = {"X-API-Key": API_KEY}
    r = requests.get(url, params=params, headers=headers)
    r.raise_for_status()
    return r.json()


In [6]:
def days_to_dataframe(json_obj, sensor_id):
    rows = []
    for item in json_obj.get("results", []):
        period = item.get("period") or {}
        dt_from_obj = period.get("datetimeFrom") or {}
        dt_to_obj   = period.get("datetimeTo") or {}
        dt_from_local = dt_from_obj.get("local")
        dt_to_local   = dt_to_obj.get("local")
        date_local = None
        for dt in (dt_from_local, dt_to_local):
            if dt:
                date_local = dt[:10]
                break
        rows.append(
            {
                "sensor_id": sensor_id,
                "parameter": item["parameter"]["name"],
                "units": item["parameter"]["units"],
                "date_local": date_local,
                "avg_value": item["value"],
            }
        )
    return pd.DataFrame(rows)


In [7]:
all_locations = []

for iso in COUNTRIES:
    page = 1
    while True:
        data = get_locations_for_country(iso, limit=100, page=page)
        df = locations_to_dataframe(data, iso)
        if df.empty:
            break
        all_locations.append(df)
        if len(df) < 100:
            break
        page += 1

locations_df = pd.concat(all_locations, ignore_index=True)

pm25_baltics = locations_df[
    locations_df["parameter"].str.lower() == "pm25"
].drop_duplicates(subset=["sensor_id"])

pm25_baltics.shape


(28, 10)

In [8]:
today = date.today()
date_from = (today - timedelta(days=1)).isoformat()
date_to = today.isoformat()
date_from, date_to


('2025-11-21', '2025-11-22')

In [9]:
sensor_ids = pm25_baltics["sensor_id"].unique()
len(sensor_ids)


28

In [10]:
all_rows = []

for sid in sensor_ids:
    try:
        data = get_daily_measurements_for_sensor(sid, date_from, date_to)
        df = days_to_dataframe(data, sid)
        if df.empty:
            continue
        # Take the most recent row (there will usually be 1)
        latest = df.sort_values("date_local").iloc[-1]
        all_rows.append(latest)
    except Exception as e:
        print(f"Error fetching sensor {sid}: {e}")

latest_pm25 = pd.DataFrame(all_rows)
latest_pm25.head()


Unnamed: 0,sensor_id,parameter,units,date_local,avg_value
0,23435,pm25,µg/m³,2025-11-21,7.05
0,23453,pm25,µg/m³,2025-11-21,7.14
0,8613037,pm25,µg/m³,2025-11-21,6.8
0,23448,pm25,µg/m³,2025-11-21,7.19
0,35347,pm25,µg/m³,2025-11-21,7.26


In [11]:
pm25_meta = pm25_baltics.drop_duplicates(subset=["sensor_id"])

latest_pm25_full = latest_pm25.merge(
    pm25_meta[["sensor_id", "country_code", "locality", "location_name", "latitude", "longitude"]],
    on="sensor_id",
    how="left"
)

latest_pm25_full.head()


Unnamed: 0,sensor_id,parameter,units,date_local,avg_value,country_code,locality,location_name,latitude,longitude
0,23435,pm25,µg/m³,2025-11-21,7.05,EE,Saare,"""Saarejärve""",58.702778,26.758898
1,23453,pm25,µg/m³,2025-11-21,7.14,EE,Kohtla-Järve,"""Kohtla-Järve""",59.409725,27.278622
2,8613037,pm25,µg/m³,2025-11-21,6.8,EE,Vihula,"""Lahemaa""",59.494446,25.930569
3,23448,pm25,µg/m³,2025-11-21,7.19,EE,Tallinn,"""Õismäe""",59.414169,24.649458
4,35347,pm25,µg/m³,2025-11-21,7.26,EE,Nõo,"""Tartu""",58.370556,26.734722


In [12]:
# Drop any rows with missing coordinates or values
heat_df = latest_pm25_full.dropna(subset=["latitude", "longitude", "avg_value"]).copy()

# Optionally cap values so one crazy sensor doesn't dominate
heat_df = heat_df[heat_df["avg_value"] <= 200]  # safety cap

heat_data = [
    [row["latitude"], row["longitude"], row["avg_value"]]
    for _, row in heat_df.iterrows()
]

# Center somewhere in the Baltics
m = folium.Map(location=[56.8, 24.0], zoom_start=5)

HeatMap(
    heat_data,
    radius=18,
    blur=25,
    max_zoom=6
).add_to(m)

map_path = "../data/raw/baltics_pm25_heatmap_latest.html"
m.save(map_path)
map_path


'../data/raw/baltics_pm25_heatmap_latest.html'

In [13]:
import numpy as np
import folium
from folium.plugins import HeatMap

# 1. Start from the latest snapshot table
df = latest_pm25_full.copy()

# 2. Keep only rows with valid coordinates and values
df = df.dropna(subset=["latitude", "longitude", "avg_value"])

# 3. Remove obviously bad values (safety cap)
df = df[(df["avg_value"] >= 0) & (df["avg_value"] <= 200)]

# 4. Focus only on the three Baltic states (just in case)
df = df[df["country_code"].isin(["EE", "LV", "LT"])]

print(df.shape)
df.head()


(20, 10)


Unnamed: 0,sensor_id,parameter,units,date_local,avg_value,country_code,locality,location_name,latitude,longitude
0,23435,pm25,µg/m³,2025-11-21,7.05,EE,Saare,"""Saarejärve""",58.702778,26.758898
1,23453,pm25,µg/m³,2025-11-21,7.14,EE,Kohtla-Järve,"""Kohtla-Järve""",59.409725,27.278622
2,8613037,pm25,µg/m³,2025-11-21,6.8,EE,Vihula,"""Lahemaa""",59.494446,25.930569
3,23448,pm25,µg/m³,2025-11-21,7.19,EE,Tallinn,"""Õismäe""",59.414169,24.649458
4,35347,pm25,µg/m³,2025-11-21,7.26,EE,Nõo,"""Tartu""",58.370556,26.734722


In [20]:
# Use the 95th percentile as "max" to avoid one extreme point dominating
p95 = df["avg_value"].quantile(0.95)

# clip at 95th percentile, then compress using sqrt
df["weight"] = np.sqrt(df["avg_value"].clip(upper=p95) / p95)
df["weight"] = df["weight"].astype(float)


# Data for HeatMap: [lat, lon, weight]
heat_data = df[["latitude", "longitude", "weight"]].values.tolist()
len(heat_data)


20

In [23]:
# Center roughly in the middle of the Baltic states
m = folium.Map(
    location=[56.8, 24.5],  # between Riga and Kaunas
    zoom_start=6,
    tiles="cartodbpositron"  # cleaner background
)

# Optional: fit bounds tightly around Baltics (south-west, north-east corners)
baltic_bounds = [[53.8, 19.0],  # SW approx (Poland/N Baltic coast)
                 [60.5, 30.5]]  # NE approx (north of Estonia)
m.fit_bounds(baltic_bounds)

HeatMap(
    heat_data,
    radius=50,      # was 40
    blur=35,        # was 30
    min_opacity=0.3,
    max_zoom=7
).add_to(m)



<folium.plugins.heat_map.HeatMap at 0x18b424060b0>

In [24]:
m


In [25]:
# Build a custom legend (static ranges)
legend_html = """
<div style="position: fixed; top: 50px; right: 50px; z-index: 9999;
            background-color: white; padding: 10px 15px;
            border: 2px solid #444; border-radius: 5px;
            box-shadow: 0 0 15px rgba(0,0,0,0.3); font-size: 13px;">
  <b>PM2.5 (µg/m³)</b><br>
  <div style="margin-top: 5px;">
    <span style="display:inline-block;width:18px;height:10px;background:#00ff00;
                 margin-right:6px;border:1px solid #000;"></span>
    0 – 5 (very low)<br>
    <span style="display:inline-block;width:18px;height:10px;background:#7fff00;
                 margin-right:6px;border:1px solid #000;"></span>
    5 – 15 (moderate)<br>
    <span style="display:inline-block;width:18px;height:10px;background:#ff8000;
                 margin-right:6px;border:1px solid #000;"></span>
    15 – 35 (high)<br>
    <span style="display:inline-block;width:18px;height:10px;background:#ff0000;
                 margin-right:6px;border:1px solid #000;"></span>
    > 35 (very high)
  </div>
</div>
"""


from folium import Element
m.get_root().html.add_child(Element(legend_html))


<branca.element.Element at 0x18b4242cb80>

In [26]:
map_path = "../data/raw/baltics_pm25_heatmap_latest.html"
m.save(map_path)
map_path


'../data/raw/baltics_pm25_heatmap_latest.html'

In [27]:
df["avg_value"].describe()


count     20.000000
mean      20.724500
std       35.248569
min        6.060000
25%        7.242500
50%       12.250000
75%       15.125000
max      166.000000
Name: avg_value, dtype: float64

In [29]:
import requests
import pandas as pd

API_KEY = "b654dc40b92bdd512956b2478836f207278e54d27dca9cdf1f20ab728bd7bd16"
BASE_URL = "https://api.openaq.org/v3"

def get_locations(city_name="Riga"):
    url = f"{BASE_URL}/locations"
    params = {
        "city": city_name,
        "limit": 100
    }
    headers = {"X-API-Key": API_KEY}
    r = requests.get(url, params=params, headers=headers)
    r.raise_for_status()
    return r.json()

def locations_to_df(data):
    rows = []
    for loc in data.get("results", []):
        for sensor in loc.get("sensors", []):
            rows.append({
                "location_id": loc["id"],
                "location_name": loc["name"],
                "locality": loc["locality"],
                "latitude": loc["coordinates"]["latitude"],
                "longitude": loc["coordinates"]["longitude"],
                "sensor_id": sensor["id"],
                "parameter": sensor["parameter"]["name"],
                "units": sensor["parameter"]["units"],
            })
    return pd.DataFrame(rows)

# Fetch Riga sensors
riga_data = locations_to_df(get_locations("Riga"))
pm25_riga = riga_data[riga_data["parameter"].str.lower() == "pm25"]

pm25_riga.shape, pm25_riga


((80, 8),
      location_id                                 location_name   locality  \
 1              3                                    NMA - Nima       None   
 3              4                                    NMT - Nima       None   
 5              5                               JTA - Jamestown       None   
 7              6                             ADT - Asylum Down       None   
 9              7                           ADEPA - Asylum Down       None   
 ..           ...                                           ...        ...   
 371          145                                  Escuela E-10       None   
 375          146                    Southwark A2 Old Kent Road     London   
 379          147                            Greenwich - Eltham  Greenwich   
 383          148                             London Bloomsbury     London   
 390          150  London Haringey Priory Park South - UKA00568     London   
 
       latitude  longitude  sensor_id parameter  uni

In [30]:
import pandas as pd

# Filter to Latvia + PM2.5 + locality containing "Riga"
riga_pm25 = locations_df[
    (locations_df["country_code"] == "LV") &
    (locations_df["parameter"].str.lower() == "pm25") &
    (locations_df["locality"].str.contains("riga", case=False, na=False))
].drop_duplicates(subset=["sensor_id"])

riga_pm25.shape, riga_pm25[["location_name", "locality", "latitude", "longitude", "sensor_id"]]


((1, 10),
                location_name locality   latitude  longitude  sensor_id
 70  Riga Kronvalda boulevard     Riga  56.954847  24.104756    1606344)