In [5]:
import json
import requests
import pandas as pd
import time  # Added for delay
from datetime import datetime, timezone, timedelta

# Load location IDs and names
with open("../locations.json", "r") as f:
    locations = json.load(f)

# Load API keys from secrets.json
with open("../secrets.json", "r") as f:
    secrets = json.load(f)

api_keys = list(secrets.values())

# Load sensor parameter data from a local JSON file
with open("../all_sensors_data.json", "r") as f:
    sensor_parameters_data = json.load(f)

api_keys=['007c6ade6a3835eddf41651b9dfff459cac26e760db15cabca670837c099e21a',
 '6f6c13afd19d615c13ca0db64d397df609db7843bec9cd813d1645e71fc99b3b',
 'dfebfc9f4dc60ac457ba5f421b68f9f73f72eee9f6e4b7f14d77174716af5b10',
 'b9b63a5d19bad4cd7b3baa4da31b11b0a646b137e3e2e81c3f97b4b47d864407',
 '363ee648f72bcff8c689e03bf485ed7edb0e4467c300aceeaf73ac94d056ba17',
 'a437e50d9065c8504dbb3b8f907debba4dcfaf10c7a417c4646d801f8ec7533a',
 'c3a4703bf1e57d93c8af8eb3041e2aa27016a8acb768ee63007a394d1221a898',
 '9f05a3e562ab8e347efeb3f25ecfdea31daf75ca8c27e3e19a9d498290dd8f58',
 '22444e8d29ac1096ae9f66a1d115e5941098bcc5bdfcfd4cef9e1fe2050ceecd',
 '8f6121ec47a0179e07c604eeddb69636208a24f40fb64d0f0a25bfeecff589ce',
 '3c7d6c5bba888aadb797f3013b37920b86d2a84196041abfbd4fe372a2408408']

# Create a dictionary for quick lookup by sensor_id
sensor_parameters = {}
for location in sensor_parameters_data:
    for sensor in location['sensors']:
        sensor_parameters[sensor['sensorid']] = {
            'parameter_name': sensor['parameter_name'],
            'units': sensor['units'],
            'display_name': sensor['display_name']
        }

# Function to get headers for each API key
def get_headers(api_key):
    return {
        "accept": "application/json",
        "X-API-Key": api_key
    }

# List to store all sensor data
all_sensor_data = []
# Get today's UTC date
today_utc = datetime.now(timezone.utc).date()
yesterday_utc = today_utc - timedelta(days=1)

# Fetch data for each location using multiple API keys
api_index = 0  # Track current API key index

for loc_id, loc_name in locations.items():
    headers = get_headers(api_keys[api_index % len(api_keys)])  # Cycle through API keys
    api_index += 1  # Move to the next API key for the next request
    
    url = f"https://api.openaq.org/v3/locations/{loc_id}/latest"
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        data = response.json()
        
        for result in data.get('results', []):
            # Extract UTC datetime
            utc_str = result['datetime']['utc']
            utc_time = datetime.fromisoformat(utc_str.replace('Z', '+00:00')).replace(tzinfo=timezone.utc)
            utc_date = utc_time.date()

            
            # Filter for today's data
            if utc_date == today_utc or utc_date==yesterday_utc:
                # Extract sensor_id from the result
                sensor_id = result.get('sensorsId', 'N/A')  # Verify key name in API response
                
                # Look up sensor parameters from the local JSON data
                parameter_info = sensor_parameters.get(sensor_id, {})
                parameter_name = parameter_info.get('parameter_name', 'unknown')
                units = parameter_info.get('units', 'N/A')
                display_name = parameter_info.get('display_name', 'N/A')
                
                # Append to list
                all_sensor_data.append({
                    'location_id': loc_id,
                    'location_name': loc_name,
                    'datetime_utc': utc_str,
                    'parameter': parameter_name,
                    'value': result['value'],
                    'unit': units,
                    'sensor_id': sensor_id,
                    'latitude': result['coordinates']['latitude'],
                    'longitude': result['coordinates']['longitude'],
                    'display_name': display_name
                })
        
        time.sleep(1 / len(api_keys))  # Adjust delay to distribute load across keys
    
    except requests.exceptions.RequestException as e:
        print(f"Error for location {loc_id}: {e}")
        time.sleep(1)  # Sleep even if request fails

# Create DataFrame
df = pd.DataFrame(all_sensor_data)
df = df[~df["parameter"].isin(["unknown", "um003", "pm1"])]
print(f"Found {len(df)} sensor readings for today ({today_utc})")

Found 4160 sensor readings for today (2025-04-18)


In [6]:
df_loc = df[['location_id', 'location_name', 'latitude', 'longitude']].drop_duplicates()
df_loc['location_id'] = df_loc['location_id'].astype(int)

# Convert 'location_name' to string (char)
df_loc['location_name'] = df_loc['location_name'].astype(str)
df_loc.to_csv('../locations.csv',index=False)