In [6]:
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
from openaq import OpenAQ
from dotenv import load_dotenv
import os

load_dotenv()

API_KEY = os.getenv("AQAPI_KEY")


# Parameters
RADIUS = 10_000
LIMIT = 3
DATE_RANGE = [datetime.now() - timedelta(days=1), datetime.now()]


# Initialize the OpenAQ client
client = OpenAQ(api_key=API_KEY)


# Function to fetch sensor data near a given location, return a dataframe where each row is a sensor and has data about the location and one column contains the measurements
def fetch_nearby_sensors(latitude, longitude, radius=10_000, limit=100):
    # Get the locations near the given coordinates
    response = client.locations.list(coordinates=(latitude, longitude), radius=radius, limit=limit)

    format_string = "%Y-%m-%dT%H:%M:%SZ"
    data = {}

    total_locations = len(response.results)
    current_location = 0

    # For each location in the response, fetch its sensors
    for location in response.results:
        current_location += 1
        current_sensor = 0
        for sensor in location.sensors:
            total_sensors = len(location.sensors)
            current_sensor += 1
            print(f"Processing: Location {current_location}/{total_locations}, Sensor {current_sensor}/{total_sensors}", end="\r")
            lat = location.coordinates.latitude
            long = location.coordinates.longitude
            loc_name = location.name
            location_id = location.id
            sensor = sensor.id



            # Try to fetch measurements with one retry attempt
            try:
                measurements = client.measurements.list(sensor)
            except Exception as e:
                print(f"\nFirst attempt failed for sensor {sensor}, retrying once...")
                try:
                    measurements = client.measurements.list(sensor)
                except Exception as e:
                    print(f"\nSkipping sensor {sensor} due to error: {str(e)}")
                    continue

            m_id = 0

            # For each measurement, record the relevant data
            for measurement in measurements.results:
                m_id += 1
                epoch = datetime.strptime(measurement.period.datetime_from.utc, format_string)
                duration = timedelta(seconds=pd.to_timedelta(measurement.period.interval).seconds)
                parameter = measurement.parameter.name

                value = measurement.value
                units = measurement.parameter.units

                unique_id = f"{location_id}_{sensor}_{m_id}"

                data[unique_id] = {
                    "unique_id": unique_id,
                    "measurement_id": m_id,
                    "sensor_id": sensor,
                    "location_id": location_id,
                    "location": loc_name,
                    "latitude": lat,
                    "longitude": long,
                    "epoch": epoch,
                    "duration": duration,
                    "parameter": parameter,
                    "value": value,
                    "units": units,
                }

    return pd.DataFrame.from_dict(data, orient="index")


# Example: Get sensors within 10km of Los Angeles (34.0549, -118.2426)
df = fetch_nearby_sensors(34.0549, -118.2426)

# Close the API client
# client.close()

print(df.shape)

# Display results
df.head()


Processing: Location 22/73, Sensor 1/6

HTTP 500 - {"message":"Internal Server Error"}
NoneType: None



First attempt failed for sensor 2000952, retrying once...
Processing: Location 49/73, Sensor 2/6

HTTP 500 - {"message":"Internal Server Error"}
NoneType: None



First attempt failed for sensor 2000894, retrying once...
Processing: Location 52/73, Sensor 5/5

HTTP 500 - {"message":"Internal Server Error"}
NoneType: None



First attempt failed for sensor 5244463, retrying once...
Processing: Location 70/73, Sensor 4/6

HTTP 500 - {"message":"Internal Server Error"}
NoneType: None



First attempt failed for sensor 7526859, retrying once...
(368614, 12)Location 73/73, Sensor 1/1


Unnamed: 0,unique_id,measurement_id,sensor_id,location_id,location,latitude,longitude,epoch,duration,parameter,value,units
1575_8681_1,1575_8681_1,1,8681,1575,Los Angeles - N. Mai,34.0669,-118.2417,2016-11-15 20:00:00,0 days 01:00:00,co,0.71,ppm
1575_8681_2,1575_8681_2,2,8681,1575,Los Angeles - N. Mai,34.0669,-118.2417,2016-11-16 00:00:00,0 days 01:00:00,co,0.55,ppm
1575_8681_3,1575_8681_3,3,8681,1575,Los Angeles - N. Mai,34.0669,-118.2417,2016-11-16 01:00:00,0 days 01:00:00,co,0.57,ppm
1575_8681_4,1575_8681_4,4,8681,1575,Los Angeles - N. Mai,34.0669,-118.2417,2016-11-16 02:00:00,0 days 01:00:00,co,0.54,ppm
1575_8681_5,1575_8681_5,5,8681,1575,Los Angeles - N. Mai,34.0669,-118.2417,2016-11-16 03:00:00,0 days 01:00:00,co,0.43,ppm


In [None]:
df.shape
df.to_csv("data/measurements_10km_la.csv", index=False)
