In [9]:
import time
import pandas as pd
from pathlib import Path
from openaq import OpenAQ
import requests
import json
import copy

In [10]:
with open("credentials.json", 'r') as f:
    credentials = json.load(f)

In [11]:

# client = OpenAQ(api_key=credentials['OPENAQ-API-KEY'])
# location_response = client.locations.list(coordinates=[41.88831156293874, -87.65155282542493], radius=24999, limit=1000).json()
headers = {"X-API-Key" : credentials['OPENAQ-API-KEY']}
coords = (41.88831156293874, -87.65155282542493)
radius = 24999
limit = 100
url = f"https://api.openaq.org/v3/locations?coordinates={str(coords[0])}%2C%20{str(coords[1])}&radius={radius}&limit={limit}&page=1&order_by=id&sort_order=asc"
loc_response = requests.get(url, headers=headers).json()

with open("Chicago_response.json", 'w') as f:
    json.dump(loc_response, f, indent=4)

In [12]:
loc_response

{'meta': {'name': 'openaq-api',
  'website': '/',
  'page': 1,
  'limit': 100,
  'found': 22},
 'results': [{'id': 424,
   'name': 'CHI_COM',
   'locality': 'Chicago-Naperville-Joliet',
   'timezone': 'America/Chicago',
   'country': {'id': 155, 'code': 'US', 'name': 'United States'},
   'owner': {'id': 4, 'name': 'Unknown Governmental Organization'},
   'provider': {'id': 119, 'name': 'AirNow'},
   'isMobile': False,
   'isMonitor': True,
   'instruments': [{'id': 2, 'name': 'Government Monitor'}],
   'sensors': [{'id': 737,
     'name': 'o3 ppm',
     'parameter': {'id': 10,
      'name': 'o3',
      'units': 'ppm',
      'displayName': 'O₃'}},
    {'id': 2933,
     'name': 'pm25 µg/m³',
     'parameter': {'id': 2,
      'name': 'pm25',
      'units': 'µg/m³',
      'displayName': 'PM2.5'}}],
   'coordinates': {'latitude': 41.7547, 'longitude': -87.7136},
   'licenses': [{'id': 33,
     'name': 'US Public Domain',
     'attribution': {'name': 'Unknown Governmental Organization', 'url

In [13]:

def get_data(sensor_id:int, datetime_from: str, datetime_to: str, location_name:str, latitude:str, longitude:str, country_name:str, total_api_calls:int):
    global TOTAL_API_CALLS
    headers = {"X-API-Key" : credentials['OPENAQ-API-KEY']}
    data_dict = {}
    page=1
    limit=1000

    print(f"\nsensor_id: {sensor_id} | Fetching data from {datetime_from} to {datetime_to}")

    while True:
        url = f"https://api.openaq.org/v3/sensors/{sensor_id}/hours?datetime_to={datetime_to}&datetime_from={datetime_from}&limit=1000&page={page}"
        # readings = client.measurements.list(sensors_id=sensor_id, limit=limit, page=page, datetime_from=datetime_from, datetime_to=datetime_to)

        readings = requests.get(url, headers=headers)
        readings.raise_for_status()
        
        total_api_calls += 1

        readings = readings.json()

        if readings['meta']['found'] == 0:
            print(f"No readings found from {datetime_from} to {datetime_to}")
            return total_api_calls

        if page==1:
            pollutant_name = readings['results'][0]['parameter']['name']
            unit = readings['results'][0]['parameter']['units']

            page_timestamps = [f['period']['datetimeTo']['utc'] for f in readings['results']]
            data_dict['Timestamp'] = page_timestamps
            for stat in ['min', 'q02', 'q25', 'median', 'q75', 'q98', 'max', 'avg', 'sd']:
                stat_data = [f['summary'][stat] for f in readings['results'] if stat in f.get('summary', {})]
                stat_col_name = f"{stat}"
                data_dict[stat_col_name] = stat_data
        else:
            data_dict['Timestamp'] = data_dict['Timestamp'] + [f['period']['datetimeTo']['utc'] for f in readings['results']]
            for stat in ['min', 'q02', 'q25', 'median', 'q75', 'q98', 'max', 'avg', 'sd']:
                stat_data = [f['summary'][stat] for f in readings['results'] if stat in f.get('summary', {})]
                stat_col_name = f"{stat}"
                data_dict[stat_col_name] = data_dict[stat_col_name] + stat_data
        


        print(f"sensor_id: {sensor_id} | Fetched {len(stat_data)} readings from page {page} | Total readings - {len(data_dict['Timestamp'])}")
        if len(stat_data)<limit:
            print("Reached the end of data.")
            break

        page += 1
    
    print(f"Finished Fetching all data. Total readings are {len(data_dict['Timestamp'])} for sensor {sensor_id}")
    
    if len(page_timestamps)>0:
        data_df = pd.DataFrame(data_dict)
        data_df['pollutant'] = pollutant_name
        data_df['location_name'] = location_name
        data_df['latitude'] = latitude
        data_df['longitude'] = longitude
        data_df['unit'] = unit
        data_df['sensor_id'] = sensor_id
        data_df['city_name'] = "Chicago"
        data_df['state'] = "Illinois"
        data_df['country'] = country_name
        data_file_path = Path("AQ_data") / "Chicago" / location_name / str(sensor_id) / datetime_from[:4]/ datetime_to.split("-")[1]
        data_file_path.mkdir(exist_ok=True, parents=True)
        file_name = data_file_path / (pollutant_name+"_"+str(sensor_id)+'.csv')
        # print(type(file_name), file_name)
        data_df.to_csv(file_name, index=False)
        print(f"saved the data to {file_name}")
    
    return total_api_calls
    
    

In [14]:
TOTAL_API_CALLS = 0
def get_sensor_data(location_id:int, location_name:str, latitude:float, longitude:float, country_name:str):
    url = f"https://api.openaq.org/v3/locations/{location_id}/sensors"
    headers = {"X-API-Key" : credentials['OPENAQ-API-KEY']}
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    response = response.json()
    global TOTAL_API_CALLS

    sensor_ids = [sensor['id'] for sensor in response['results'] if sensor.get('coverage')]
    year_first = [int(sensor['datetimeFirst']['utc'][:4]) for sensor in response['results'] if sensor.get('coverage')]
    year_last = [int(sensor['datetimeLast']['utc'][:4]) for sensor in response['results'] if sensor.get('coverage')]
    years = [list(range(year1, year2+1)) for year1, year2 in zip(year_first, year_last)]

    start= time.time()
    for k in range(len(sensor_ids)):
        
            sensor_id = sensor_ids[k]
            for year in years[k]:
                try:
                    TOTAL_API_CALLS_2 = copy.copy(TOTAL_API_CALLS)
                    TOTAL_API_CALLS = get_data(sensor_id=sensor_id, 
                                               datetime_from=f"{str(year)}-01-01", 
                                               datetime_to=f"{str(year)}-06-30",
                                               location_name=location_name,
                                               latitude=latitude,
                                               longitude=longitude,
                                               country_name=country_name,
                                               total_api_calls=TOTAL_API_CALLS)
                    TOTAL_API_CALLS = get_data(sensor_id=sensor_id,
                                               datetime_from=f"{str(year)}-07-01",
                                               datetime_to=f"{str(year)}-12-31",
                                               location_name=location_name,
                                               latitude=latitude,
                                               longitude=longitude,
                                               country_name=country_name,
                                               total_api_calls=TOTAL_API_CALLS)
                    stop = time.time()
                    time.sleep(5)
                    print(f"\ntime elapsed: {(stop-start):.4f} | {TOTAL_API_CALLS = }\n")
                    if (stop-start) > 540:
                        time.sleep(40)

                except:
                    print(f"Ran into key error. Retrying after 2 minutes\n\n")
                    time.sleep(120)
                    TOTAL_API_CALLS = TOTAL_API_CALLS_2
                    TOTAL_API_CALLS = get_data(sensor_id=sensor_id,
                                               datetime_from=f"{str(year)}-01-01",
                                               datetime_to=f"{str(year)}-06-30",
                                               location_name=location_name,
                                               latitude=latitude,
                                               longitude=longitude,
                                               country_name=country_name,
                                               total_api_calls=TOTAL_API_CALLS)
                    
                    TOTAL_API_CALLS = get_data(sensor_id=sensor_id,
                                               datetime_from=f"{str(year)}-07-01",
                                               datetime_to=f"{str(year)}-12-31",
                                               location_name=location_name,
                                               latitude=latitude,
                                               longitude=longitude,
                                               country_name=country_name,
                                               total_api_calls=TOTAL_API_CALLS)
                    stop = time.time()
                    time.sleep(30)
                    print(f"time elapsed: {(stop-start):.4f} | {TOTAL_API_CALLS = }")

In [15]:
for loc in loc_response['results'][8:]:
    country_name = loc['country']['name']
    latitude = loc['coordinates']['latitude'] if loc['coordinates'].get('latitude') else None
    longitude = loc['coordinates']['longitude'] if loc['coordinates'].get('longitude') else None
    get_sensor_data(location_id=loc['id'], location_name=loc['name'], latitude=latitude, longitude=longitude, country_name=country_name)


sensor_id: 6664822 | Fetching data from 2023-01-01 to 2023-06-30
No readings found from 2023-01-01 to 2023-06-30

sensor_id: 6664822 | Fetching data from 2023-07-01 to 2023-12-31
sensor_id: 6664822 | Fetched 216 readings from page 1 | Total readings - 216
Reached the end of data.
Finished Fetching all data. Total readings are 216 for sensor 6664822
saved the data to AQ_data\Chicago\West Albany Park, Chicago\6664822\2023\12\temperature_6664822.csv

time elapsed: 0.6238 | TOTAL_API_CALLS = 2


sensor_id: 6664822 | Fetching data from 2024-01-01 to 2024-06-30
sensor_id: 6664822 | Fetched 1000 readings from page 1 | Total readings - 1000
sensor_id: 6664822 | Fetched 1000 readings from page 2 | Total readings - 2000
sensor_id: 6664822 | Fetched 420 readings from page 3 | Total readings - 2420
Reached the end of data.
Finished Fetching all data. Total readings are 2420 for sensor 6664822
saved the data to AQ_data\Chicago\West Albany Park, Chicago\6664822\2024\06\temperature_6664822.csv

sens

In [8]:
loc 

{'id': 1370216,
 'name': 'West Albany Park, Chicago',
 'locality': None,
 'timezone': 'America/Chicago',
 'country': {'id': 155, 'code': 'US', 'name': 'United States'},
 'owner': {'id': 4, 'name': 'Unknown Governmental Organization'},
 'provider': {'id': 66, 'name': 'AirGradient'},
 'isMobile': False,
 'isMonitor': True,
 'instruments': [{'id': 2, 'name': 'Government Monitor'},
  {'id': 2, 'name': 'Government Monitor'}],
 'sensors': [{'id': 7979159,
   'name': 'pm1 µg/m³',
   'parameter': {'id': 19,
    'name': 'pm1',
    'units': 'µg/m³',
    'displayName': 'PM1'}},
  {'id': 7971260,
   'name': 'pm1 µg/m³',
   'parameter': {'id': 19,
    'name': 'pm1',
    'units': 'µg/m³',
    'displayName': 'PM1'}},
  {'id': 7971006,
   'name': 'pm10 µg/m³',
   'parameter': {'id': 1,
    'name': 'pm10',
    'units': 'µg/m³',
    'displayName': 'PM10'}},
  {'id': 7979145,
   'name': 'pm10 µg/m³',
   'parameter': {'id': 1,
    'name': 'pm10',
    'units': 'µg/m³',
    'displayName': 'PM10'}},
  {'id':

In [9]:
import pandas as pd

df1 = pd.read_csv("./AQ_data/Chicago/CHI_COM/737/2020/12/o3_737.csv")
df2 = pd.read_csv("./AQ_data/Bangalore/BTM Layout, Bengaluru - CPCB/14635/2018/06/pm25_14635.csv")

In [10]:
df1.columns == df2.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [1]:
some_path = "US Diplomatic Post: New Delhi"

In [None]:
some_path.replace(":", "_").replace(" ", "_")