In [1]:
import requests

base_url = "https://api.openaq.org/v3/sensors/{}/hours".format(1616)
headers = {
    'accept': 'application/json',
    'X-API-Key': '11d15f383ed4dc3610e04f146551cf6322fbfd0652d03afc96a251d71611e05d'
}

all_results = []
page = 1
total_found = None

params = {
    'limit': 1000,  # Using max limit
    'page': page
}

response = requests.get(base_url, headers=headers, params=params)

In [5]:
data= response.json()
data['results']

[{'value': 0.049,
  'flagInfo': {'hasFlags': False},
  'parameter': {'id': 10, 'name': 'o3', 'units': 'ppm', 'displayName': None},
  'period': {'label': '1hour',
   'interval': '01:00:00',
   'datetimeFrom': {'utc': '2016-04-06T17:00:00Z',
    'local': '2016-04-06T10:00:00-07:00'},
   'datetimeTo': {'utc': '2016-04-06T18:00:00Z',
    'local': '2016-04-06T11:00:00-07:00'}},
  'coordinates': None,
  'summary': {'min': 0.049,
   'q02': 0.049,
   'q25': 0.049,
   'median': 0.049,
   'q75': 0.049,
   'q98': 0.049,
   'max': 0.049,
   'avg': 0.049,
   'sd': None},
  'coverage': {'expectedCount': 1,
   'expectedInterval': '01:00:00',
   'observedCount': 1,
   'observedInterval': '01:00:00',
   'percentComplete': 100.0,
   'percentCoverage': 100.0,
   'datetimeFrom': {'utc': '2016-04-06T18:00:00Z',
    'local': '2016-04-06T11:00:00-07:00'},
   'datetimeTo': {'utc': '2016-04-06T18:00:00Z',
    'local': '2016-04-06T11:00:00-07:00'}}},
 {'value': 0.052,
  'flagInfo': {'hasFlags': False},
  'param

In [9]:
'>' in data['meta']['found']

True

In [5]:
not '>' in str('>99')

False

In [9]:
import requests
import pandas as pd
from time import sleep
from requests.exceptions import RequestException

def get_sensor_data_generator(sensor_id, api_key, max_retries=3, retry_delay=60):
    """Generator function to yield data page by page with retry logic"""
    base_url = f"https://api.openaq.org/v3/sensors/{sensor_id}/hours"
    headers = {
        'accept': 'application/json',
        'X-API-Key': api_key
    }
    
    page = 1
    
    while True:
        params = {
            'limit': 1000,
            'page': page
        }
        
        # Add retry logic
        for retry in range(max_retries):
            try:
                response = requests.get(base_url, headers=headers, params=params)
                response.raise_for_status()
                break  # Success! Break the retry loop
            except RequestException as e:
                if retry == max_retries - 1:  # Last retry
                    print(f"Failed after {max_retries} attempts on page {page}. Error: {e}")
                    return  # Exit generator
                print(f"Attempt {retry + 1} failed. Retrying in {retry_delay} seconds...")
                sleep(retry_delay)
        
        data = response.json()
        

        found_records_in_curr_page = data['meta']['found']
        print(f"Total records found: {found_records_in_curr_page} in page {page}.")
        
        # Create pairs of timestamp and value
        data_pairs = [
            (result['period']['datetimeTo']['utc'], result['value'])
            for result in data['results']
        ]
        yield data_pairs
        
        if isinstance(found_records_in_curr_page, int):
            records_so_far = records_so_far + found_records_in_curr_page
        else:
            records_so_far = page * params['limit']
        
        print(f"Processed page {page}, records so far: {records_so_far}")
            
        page += 1
        print(f"Waiting 60 seconds before next request...")
        sleep(retry_delay)  # Long pause between requests
        
        if '>' not in str(found_records_in_curr_page):
            break

def process_sensor_data(sensor_id, pollutant_name, api_key):
    """Process data for a single sensor and save to one CSV file"""
    print(f"\nFetching data for {pollutant_name} (Sensor ID: {sensor_id})...")
    
    data_pairs = []
    
    for pairs in get_sensor_data_generator(sensor_id, api_key):
        if pairs is None:
            print(f"Failed to fetch complete data for {pollutant_name}")
            break
        data_pairs.extend(pairs)
        print(f"Current data points collected: {len(data_pairs)}")
    
    if not data_pairs:
        print(f"No data collected for {pollutant_name}")
        return None
    
    # Create DataFrame from pairs
    df = pd.DataFrame(data_pairs, columns=['timestamp', pollutant_name])
    df.set_index('timestamp', inplace=True)
    df.index = pd.to_datetime(df.index)
    df = df.sort_index()
    
    csv_filename = f"mojave_{pollutant_name.lower()}_{sensor_id}_data.csv"
    df.to_csv(csv_filename)
    print(f"Data saved to {csv_filename} with {len(df)} records")
    
    return df

def get_all_pollutant_data(api_key):
    """Process data for all sensors"""
    sensors = {
        'O3': 3796234,
        'PM10': 3796235,
        'PM2.5': 3796233
    }
    
    all_data = {}
    for pollutant, sensor_id in sensors.items():
        all_data[pollutant] = process_sensor_data(sensor_id, pollutant, api_key)
    
    return all_data

# Main execution
if __name__ == "__main__":
    api_key = "11d15f383ed4dc3610e04f146551cf6322fbfd0652d03afc96a251d71611e05d"
    pollutant_data = get_all_pollutant_data(api_key)


Fetching data for O3 (Sensor ID: 3796234)...
Total records found: >1000 in page 1.
Current data points collected: 1000
Processed page 1, records so far: 1000
Waiting 60 seconds before next request...
Total records found: >1000 in page 2.
Current data points collected: 2000
Processed page 2, records so far: 2000
Waiting 60 seconds before next request...
Total records found: >1000 in page 3.
Current data points collected: 3000
Processed page 3, records so far: 3000
Waiting 60 seconds before next request...
Total records found: >1000 in page 4.
Current data points collected: 4000
Processed page 4, records so far: 4000
Waiting 60 seconds before next request...
Total records found: >1000 in page 5.
Current data points collected: 5000
Processed page 5, records so far: 5000
Waiting 60 seconds before next request...
Total records found: >1000 in page 6.
Current data points collected: 6000
Processed page 6, records so far: 6000
Waiting 60 seconds before next request...
Total records found: >10