In [1]:
!pip install ratelimit tenacity openaq tqdm

StatementMeta(, b79deaf0-91ee-4687-b8f0-47141871b879, 3, Finished, Available, Finished)

Collecting ratelimit
  Downloading ratelimit-2.2.1.tar.gz (5.3 kB)
  Preparing metadata (setup.py) ... [?25l- \ done
Collecting openaq
  Downloading openaq-0.6.0-py3-none-any.whl.metadata (4.5 kB)
Collecting httpx<1.0,>=0.28.1 (from openaq)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting httpcore==1.* (from httpx<1.0,>=0.28.1->openaq)
  Downloading httpcore-1.0.9-py3-none-any.whl.metadata (21 kB)
Collecting h11>=0.16 (from httpcore==1.*->httpx<1.0,>=0.28.1->openaq)
  Downloading h11-0.16.0-py3-none-any.whl.metadata (8.3 kB)
Downloading openaq-0.6.0-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.5/61.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.28.1-py3-none-any.whl (73 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.5/73.5 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.9-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import json
from datetime import datetime, timedelta,timezone
from openaq import OpenAQ
import time
from threading import Lock
from tqdm import tqdm
import fsspec
from ratelimit import limits, sleep_and_retry
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception
from requests.exceptions import HTTPError

StatementMeta(, b79deaf0-91ee-4687-b8f0-47141871b879, 4, Finished, Available, Finished)

In [3]:
client = OpenAQ(api_key="d074d34fa5a6201fa2177fe98c5da21ec073be89871a5b7cb85e12584da0f253")

abfss_path = "abfss://4906b11e-1e59-4869-9321-062a4696a2db@onelake.dfs.fabric.microsoft.com/62794233-3c68-4109-ab1e-7666b1963827/Files/aq"
account_name = "4906b11e-1e59-4869-9321-062a4696a2db"
account_host = "onelake.dfs.fabric.microsoft.com"

fs = fsspec.filesystem(
    "abfss",
    account_name=account_name,
    account_host=account_host
)

StatementMeta(, b79deaf0-91ee-4687-b8f0-47141871b879, 5, Finished, Available, Finished)

In [4]:
with fs.open(f"{abfss_path}/sensors/sensors.json",'r') as file:
    data=json.load(file)
data_base = f"{abfss_path}/hourly" 
sensors_list=[int(key) for key in data.keys()]
def to_dict(obj):
    if isinstance(obj, list):
        return [to_dict(o) for o in obj]
    elif hasattr(obj, "__dict__"):
        result = {}
        for k, v in obj.__dict__.items():
            result[k] = to_dict(v)
        return result
    else:
        return obj

StatementMeta(, b79deaf0-91ee-4687-b8f0-47141871b879, 6, Finished, Available, Finished)

In [5]:

calls = 60        
periods = 60 
call_intervall=1.1
@sleep_and_retry
@limits(calls=calls, period=periods)
def safe_measurements_list(**kwargs):
    result=client.measurements.list(**kwargs).results
    time.sleep(call_intervall)
    return result

def is_rate_limit_error(e):
    return isinstance(e, HTTPError) and e.response.status_code == 429

@retry(
    retry=retry_if_exception(is_rate_limit_error),
    wait=wait_exponential(multiplier=1, min=1, max=60),
    stop=stop_after_attempt(5)
)
def safe_measurements_retry(**kwargs):
    return safe_measurements_list(**kwargs)

def fetch_sensor_data(sensor_id, position=0):
    try:
        sensor_info = client.sensors.get(sensor_id).results[0]
        current_from = datetime.fromisoformat(data[str(sensor_id)]['last_fetch']).replace(hour=0, minute=0, second=0, microsecond=0)
        datetime_last = datetime.fromisoformat(
        sensor_info.datetime_last["utc"].replace("Z", "+00:00"))
        while current_from < datetime_last:
            current_to = min(current_from + timedelta(days=30), datetime_last)
            page = 1
            while True:
                measurements = safe_measurements_retry(
                    sensors_id=sensor_id,
                    datetime_from=current_from.isoformat(),
                    datetime_to=current_to.isoformat(),
                    page=page,
                    limit=500,
                    data="hours",
                )

                if not measurements:
                    break

                for m in measurements:
                    dt_local = datetime.fromisoformat(m.period.datetime_from.local)
                    folder_path = f"{data_base}/{dt_local.year}/{dt_local.month:02d}/{dt_local.day:02d}/{dt_local.hour:02d}/{sensor_id}"
                    fs.mkdirs(folder_path, exist_ok=True)
                    file_path = f"{folder_path}/sensor_{sensor_id}.json"
                    with fs.open(file_path, "w") as f:
                        json.dump(to_dict(m), f, indent=2)

                last_dt = max(
                    datetime.fromisoformat(m.period.datetime_to.utc.replace("Z", "+00:00"))
                    for m in measurements
                )
                data[str(sensor_id)]["last_fetch"] = last_dt.isoformat()
                page += 1

            current_from = current_to
        with fs.open(f"{abfss_path}/sensors/sensors.json", "w") as f:
            json.dump(data, f, indent=2)

        return f"Finished sensor {sensor_id}"
    except Exception as e:
        return f"Sensor {sensor_id} failed: {e}"


for sensor in sensors_list:
    result = fetch_sensor_data(sensor)
    print(result)

client.close()

StatementMeta(, b79deaf0-91ee-4687-b8f0-47141871b879, 7, Finished, Available, Finished)

Finished sensor 673
Finished sensor 1097
Finished sensor 1102
Finished sensor 1103
Finished sensor 1152
Finished sensor 1109
Finished sensor 1156
Finished sensor 5077821
Finished sensor 1145
Finished sensor 1146
Finished sensor 1534
Finished sensor 1535
Finished sensor 1536
Finished sensor 4272431
Finished sensor 1693
Finished sensor 5077566
Finished sensor 1758
Finished sensor 3951
Finished sensor 3952
Finished sensor 4272273
Finished sensor 1769
Finished sensor 1783
Finished sensor 3778
Finished sensor 4272366
Finished sensor 23341
Finished sensor 2016
Finished sensor 4272352
Finished sensor 2644
Finished sensor 4272224
Finished sensor 3238
Finished sensor 4272441
Finished sensor 7631564
Finished sensor 25520
Finished sensor 1322891
Finished sensor 1662910
Finished sensor 2165515
Finished sensor 2189530


In [6]:
print(1)

StatementMeta(, b79deaf0-91ee-4687-b8f0-47141871b879, 8, Finished, Available, Finished)

1
