In [1]:
import pandas as pd
import geopandas as gpd

from pyathena import connect
from pyathena.error import OperationalError
import datetime as dt

import requests

https://docs.openaq.org/aws/athena-guide

```
s3://openaq-data-archive/records/csv.gz/
├─ year=2025/
│  ├─ month=10/
│  │  ├─ locationid=2178/
│  │  ├─ locationid=827/
│  │  └─ ...
│  └─ month=11/...
└─ year=2024/...
```

Example file path:

`/records/csv.gz/locationid=2178/year=2022/month=05/location-2178-20220503.csv.gz`

In [48]:
la_sensors_gdf = gpd.read_file("../../Data/sensor_data/la_openaq_stations.geojson")

In [88]:
la_sensors_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [56]:
la_sensors_gdf.rename(columns = {'station_id':'location_id'}, inplace=True)

In [49]:
la_sensors_gdf.head()

Unnamed: 0,sensors_id,station_id,pollutant,name,datetime_first.utc,datetime_last.utc,timezone,is_mobile,is_monitor,bounds,...,country.name,owner.id,owner.name,provider.id,provider.name,coordinates.latitude,coordinates.longitude,instrument_name,instrument_id,geometry
0,1502,847,PM2.5,South Long Beach,2016-03-06 20:00:00+00:00,2022-05-05 22:00:00+00:00,America/Los_Angeles,False,True,"[-118.175278, 33.792221, -118.175278, 33.792221]",...,United States,4,Unknown Governmental Organization,119,AirNow,33.792221,-118.175278,Government Monitor,2,POINT (-118.17528 33.79222)
1,1837,1019,O₃,Pasadena,2016-03-06 20:00:00+00:00,2016-11-09 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.1081, 34.0833, -118.1081, 34.0833]",...,United States,4,Unknown Governmental Organization,119,AirNow,34.0833,-118.1081,Government Monitor,2,POINT (-118.1081 34.0833)
2,1866,1036,O₃,Pico Rivera,2016-03-06 20:00:00+00:00,2016-11-09 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.059196, 34.0131, -118.059196, 34.0131]",...,United States,4,Unknown Governmental Organization,119,AirNow,34.0131,-118.059196,Government Monitor,2,POINT (-118.0592 34.0131)
3,1876,1042,O₃,Piru - Pacific,2016-03-06 20:00:00+00:00,2020-06-10 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.81, 34.4044, -118.81, 34.4044]",...,United States,4,Unknown Governmental Organization,119,AirNow,34.4044,-118.81,Government Monitor,2,POINT (-118.81 34.4044)
4,22301,1042,PM2.5,Piru - Pacific,2016-03-06 20:00:00+00:00,2020-06-10 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.81, 34.4044, -118.81, 34.4044]",...,United States,4,Unknown Governmental Organization,119,AirNow,34.4044,-118.81,Government Monitor,2,POINT (-118.81 34.4044)


la_sensors_gdf.set_index('sensors_id',inplace=True)

In [4]:
la_sensors_gdf.head()

Unnamed: 0_level_0,station_id,pollutant,name,datetime_first.utc,datetime_last.utc,timezone,is_mobile,is_monitor,bounds,country.id,...,country.name,owner.id,owner.name,provider.id,provider.name,coordinates.latitude,coordinates.longitude,instrument_name,instrument_id,geometry
sensors_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1502,847,PM2.5,South Long Beach,2016-03-06 20:00:00+00:00,2022-05-05 22:00:00+00:00,America/Los_Angeles,False,True,"[-118.175278, 33.792221, -118.175278, 33.792221]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,33.792221,-118.175278,Government Monitor,2,POINT (-118.17528 33.79222)
1837,1019,O₃,Pasadena,2016-03-06 20:00:00+00:00,2016-11-09 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.1081, 34.0833, -118.1081, 34.0833]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.0833,-118.1081,Government Monitor,2,POINT (-118.1081 34.0833)
1866,1036,O₃,Pico Rivera,2016-03-06 20:00:00+00:00,2016-11-09 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.059196, 34.0131, -118.059196, 34.0131]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.0131,-118.059196,Government Monitor,2,POINT (-118.0592 34.0131)
1876,1042,O₃,Piru - Pacific,2016-03-06 20:00:00+00:00,2020-06-10 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.81, 34.4044, -118.81, 34.4044]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.4044,-118.81,Government Monitor,2,POINT (-118.81 34.4044)
22301,1042,PM2.5,Piru - Pacific,2016-03-06 20:00:00+00:00,2020-06-10 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.81, 34.4044, -118.81, 34.4044]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.4044,-118.81,Government Monitor,2,POINT (-118.81 34.4044)


In [5]:
la_sensors_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1971 entries, 1502 to 14877261
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype              
---  ------                 --------------  -----              
 0   station_id             1971 non-null   int32              
 1   pollutant              1971 non-null   object             
 2   name                   1971 non-null   object             
 3   datetime_first.utc     1971 non-null   datetime64[ms, UTC]
 4   datetime_last.utc      1971 non-null   datetime64[ms, UTC]
 5   timezone               1971 non-null   object             
 6   is_mobile              1971 non-null   bool               
 7   is_monitor             1971 non-null   bool               
 8   bounds                 1971 non-null   object             
 9   country.id             1971 non-null   int32              
 10  country.code           1971 non-null   object             
 11  country.name           1971 non-null   object 

### Manually pulling from S3

In [5]:
def download_file(url, save_path):
    response = requests.get(url, stream=True)
    
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            file.write(response.content)
        print(f'{url} downloaded')
    else:
        print(f'{url} failed')

In [6]:
url = "https://openaq-data-archive.s3.amazonaws.com/records/csv.gz/locationid=1200/year=2025/month=10/location-1200-20251010.csv.gz"

In [7]:
download_file(url,"../../Data/sensor_data/station_1200.csv.gz")

https://openaq-data-archive.s3.amazonaws.com/records/csv.gz/locationid=1200/year=2025/month=10/location-1200-20251010.csv.gz downloaded


In [8]:
station_1200 = pd.read_csv('../../Data/sensor_data/station_1200.csv.gz')

In [9]:
station_1200.head()

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,parameter,units,value
0,1200,2152,Glendora - Laurel-1200,2025-10-10T01:00:00-07:00,34.1439,-117.8508,pm10,µg/m³,17.0
1,1200,2152,Glendora - Laurel-1200,2025-10-10T02:00:00-07:00,34.1439,-117.8508,pm10,µg/m³,26.0
2,1200,2152,Glendora - Laurel-1200,2025-10-10T03:00:00-07:00,34.1439,-117.8508,pm10,µg/m³,19.0
3,1200,2152,Glendora - Laurel-1200,2025-10-10T04:00:00-07:00,34.1439,-117.8508,pm10,µg/m³,24.0
4,1200,2152,Glendora - Laurel-1200,2025-10-10T05:00:00-07:00,34.1439,-117.8508,pm10,µg/m³,23.0


In [None]:
station_1200.columns

In [24]:
athena_cols = station_1200.columns

In [None]:
len(athena_cols)

### Setting up Athena

In [15]:
cursor = connect(
    s3_staging_dir="s3://la-openaq-athena-results-us-east-1/temp/",
    region_name="us-east-1"
).cursor()

In [None]:
cursor.execute("DROP TABLE openaq_db.openaqMeasurements;")

In [None]:
create_table = """
CREATE EXTERNAL TABLE IF NOT EXISTS openaq_db.openaqMeasurements (
  location_id INT,
  sensors_id INT,
  location STRING,
  datetime STRING,
  lat DOUBLE,
  lon DOUBLE,
  parameter STRING,
  units STRING,
  value DOUBLE
)
PARTITIONED BY (locationid STRING, year STRING, month STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES ('serialization.format'=',','field.delim'=',')
LOCATION 's3://openaq-data-archive/records/csv.gz/'
TBLPROPERTIES ('skip.header.line.count'='1');
"""
cursor.execute(create_table)

In [None]:
cursor.execute("SHOW PARTITIONS openaq_db.openaqMeasurements;")
print(cursor.fetchall())

In [None]:
station_IDs = la_sensors_gdf['station_id']

In [None]:
list(la_sensors_gdf['station_id'])[0]

In [None]:
str(dt.datetime.today().month).zfill(2)

In [None]:
dt.datetime.today().year

I need to fix this logic: it technically works, but returns a "not defined" error, ironically because the partitioning produced no errors.

### Partitioning

In [None]:
def add_station_partitions(station, = list(la_sensors_gdf['station_id'])[0], year=dt.datetime.today().year, month=str(dt.datetime.today().month)):
    errors = []
    month = month.zfill(2)
    for location in list(la_sensors_gdf['station_id']):
        try:
            cursor.execute(f"""
            ALTER TABLE openaq_db.openaqMeasurements ADD
            PARTITION (year='{year}', month='{month}', locationid='{location}')
            LOCATION 's3://openaq-data-archive/records/csv.gz/locationid={location}/year={year}/month={month}/';
            """)
        except OperationalError as e:
            errors.extend(f'OperationalError {e} for year: {year}, month: {month}')
            continue
    return errors

In [None]:
station_partitions_error_log = add_station_partitions(year='2025',month='12')

In [None]:
station_partition_error_log[:10]

In [None]:
cursor.execute("SHOW PARTITIONS openaq_db.openaqMeasurements;")
openaq_partitons = cursor.fetchall()

In [None]:
openaq_partitons[:20]

In [None]:
openaq_partitons[-20:]

### Query practice

In [None]:
cols = ['location_id','sensors_id','location','datetime','lat','lon','parameter','units','value']

In [None]:
cursor.execute("""
SELECT * 
FROM openaq_db.openaqMeasurements 
WHERE location_id = 1200
ORDER BY datetime DESC;
""")
station_1200 = cursor.fetchall()

In [None]:
station_1200[0]

In [None]:
station_1200_athena_df = pd.DataFrame(data=station_1200,columns=athena_cols)

In [None]:
station_1200_athena_df.head()

In [None]:
station_1200_athena_df.info()

Since we queried a single day, we have 24 (hourly) entries - for each pollutant

In [None]:
station_1200_athena_df['parameter'].value_counts()

### Querying PM2.5

I can't directly query the entire S3 bucket for PM2.5 - only where partitions are already present.

In [57]:
cursor.execute("""
SELECT * 
FROM openaq_db.openaqMeasurements 
WHERE parameter = 'pm25'
ORDER BY datetime DESC
LIMIT 10;
""")
pm25_practice = cursor.fetchall()

In [17]:
pm25_practice[0]

(3312559,
 11626125,
 'San Gabriel-3282501',
 '2025-12-12T00:00:00-08:00',
 34.099201,
 -118.107231,
 'pm25',
 'µg/m³',
 15.989500013987223,
 '3312559',
 '2025',
 '12')

In [18]:
len(pm25_practice[0])

12

In [25]:
len(athena_cols)

9

In [21]:
pm25_practice[0][:-3]

(3312559,
 11626125,
 'San Gabriel-3282501',
 '2025-12-12T00:00:00-08:00',
 34.099201,
 -118.107231,
 'pm25',
 'µg/m³',
 15.989500013987223)

The final 3 items here are not helpful - it's the location_id, year and month, which are already present in location_id and datetime

In [58]:
pm25_practice = [pm25_practice[i][:-3] for i in range(len(pm25_practice))]

In [59]:
pm25_practice_df = pd.DataFrame(data=pm25_practice,columns=athena_cols)

In [60]:
pm25_practice_df.head()

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,parameter,units,value
0,1716811,7217298,"Marco Triangle Park, Venice, California-1686784",2025-12-12T00:00:00-08:00,33.991524,-118.459939,pm25,µg/m³,36.1125
1,2905670,9660619,Story Park-2875630,2025-12-12T00:00:00-08:00,34.100018,-118.125515,pm25,µg/m³,21.775
2,26780,9636796,Thousand Oaks - Moor-2872352,2025-12-12T00:00:00-08:00,34.21017,-118.87051,pm25,µg/m³,4.0
3,1519816,6876410,East Irvine - Northwood-1489792,2025-12-12T00:00:00-08:00,33.716054,-117.74926,pm25,µg/m³,12.260417
4,2138,3842,Reseda-2138,2025-12-12T00:00:00-08:00,34.1992,-118.5331,pm25,µg/m³,14.8


In [None]:
pm25_practice_df.info()

In [None]:
pm25_practice_df.sort_values(by = 'datetime',ascending=True).head()

In [None]:
pm25_practice_df.sort_values(by = 'datetime',ascending=False).head()

There's only 9 days present here and we already have 380,000 rows

cursor.execute(f"""
SELECT *
FROM (
    SELECT *,
           ROW_NUMBER() OVER (
               PARTITION BY sensors_id
               ORDER BY datetime DESC
           ) AS rn
    FROM openaq_db.openaqMeasurements
    WHERE year = '{dt.datetime.today().year}'
      AND month = '{str(dt.datetime.today().month).zfill(2)}'
      AND sensors_id IN {tuple(sensor_IDs)}
) sub
WHERE rn = 1
ORDER BY datetime DESC;
"""
)
records = cursor.fetchall()

In [None]:
records

### Partitioning All Pm2.5

I need to modify this below: and look at the existing or intended partition scheme (url layout). I can iterate either through month or station (depending on the layout). I'll then need to make another function to iterate through e.g. each station in each month in each year . . . to complete my partitioning scheme.

In [11]:
station_ids = list(set(la_sensors_gdf['station_id']))

In [15]:
def add_station_partitions(stations, year, month):
    for location in stations:
        try:
            cursor.execute(f"""
            ALTER TABLE openaq_db.openaqMeasurements ADD
            PARTITION (year='{year}', month='{month}', locationid='{location}')
            LOCATION 's3://openaq-data-archive/records/csv.gz/locationid={location}/year={year}/month={month}/';
            """)
        except OperationalError as e:
            print(f'OperationalError {e} for year: {year}, month: {month}, station: {location}')
            pass

In [13]:
def add_month_partitions(stations, year):
    errors = []
    for month in range(1,13):
        month = str(month).zfill(2)
        try: 
            add_station_partitions(stations = stations, year = year, month = month)
        except OperationalError as e:
            print(f'OperationalError {e} for year: {year}, month: {month}, station: {station}')
            continue

In [18]:
def add_year_partitions(stations):
    errors = []
    for year in range(2016,2027):
        try: 
            add_month_partitions(stations = stations, year = year)
        except OperationalError as e:
            print(f'OperationalError {e} for year: {year}')
            pass

In [None]:
add_month_partitions(station_ids, 2024)

add_month_partitions(station_ids, 2024)

In [None]:
cursor.execute("SHOW PARTITIONS openaq_db.openaqMeasurements;")
partitions = cursor.fetchall()

In [None]:
len(partitions)

In [None]:
cursor.close()

I need to better research and understand the consequences of costs with Athena before running this:

Running for just 2025 should be a less-drastic test case. We can examine the costs and data usage there and extrapolate.

In [18]:
add_year_partitions(station_ids)

KeyboardInterrupt: 

In [None]:
cursor.execute("SHOW PARTITIONS openaq_db.openaqMeasurements;")
partitions = cursor.fetchall()

In [None]:
len(partitions)

A possibly mildly improved version:

In [None]:
def add_year_partitions(stations=station_ids,start_year=2016,end_year=2027):
    start_year = int(start_year)
    # because this is a range, we need to add an extra year
    end_year = int(end_year) + 1
    for year in range(start_year,end_year):
        try: 
            add_month_partitions(stations = stations, year = year)
        except OperationalError as e:
            print(f'OperationalError {e} for year: {year}')
            pass

In [None]:
print(range(1,12))

In [None]:
len(range(1,13))

### Query optimizing

In [10]:
la_sensors_gdf.head()

Unnamed: 0_level_0,station_id,pollutant,name,datetime_first.utc,datetime_last.utc,timezone,is_mobile,is_monitor,bounds,country.id,...,country.name,owner.id,owner.name,provider.id,provider.name,coordinates.latitude,coordinates.longitude,instrument_name,instrument_id,geometry
sensors_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1502,847,PM2.5,South Long Beach,2016-03-06 20:00:00+00:00,2022-05-05 22:00:00+00:00,America/Los_Angeles,False,True,"[-118.175278, 33.792221, -118.175278, 33.792221]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,33.792221,-118.175278,Government Monitor,2,POINT (-118.17528 33.79222)
1837,1019,O₃,Pasadena,2016-03-06 20:00:00+00:00,2016-11-09 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.1081, 34.0833, -118.1081, 34.0833]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.0833,-118.1081,Government Monitor,2,POINT (-118.1081 34.0833)
1866,1036,O₃,Pico Rivera,2016-03-06 20:00:00+00:00,2016-11-09 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.059196, 34.0131, -118.059196, 34.0131]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.0131,-118.059196,Government Monitor,2,POINT (-118.0592 34.0131)
1876,1042,O₃,Piru - Pacific,2016-03-06 20:00:00+00:00,2020-06-10 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.81, 34.4044, -118.81, 34.4044]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.4044,-118.81,Government Monitor,2,POINT (-118.81 34.4044)
22301,1042,PM2.5,Piru - Pacific,2016-03-06 20:00:00+00:00,2020-06-10 21:00:00+00:00,America/Los_Angeles,False,True,"[-118.81, 34.4044, -118.81, 34.4044]",155,...,United States,4,Unknown Governmental Organization,119,AirNow,34.4044,-118.81,Government Monitor,2,POINT (-118.81 34.4044)


In [61]:
pm25_practice_df.head()

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,parameter,units,value
0,1716811,7217298,"Marco Triangle Park, Venice, California-1686784",2025-12-12T00:00:00-08:00,33.991524,-118.459939,pm25,µg/m³,36.1125
1,2905670,9660619,Story Park-2875630,2025-12-12T00:00:00-08:00,34.100018,-118.125515,pm25,µg/m³,21.775
2,26780,9636796,Thousand Oaks - Moor-2872352,2025-12-12T00:00:00-08:00,34.21017,-118.87051,pm25,µg/m³,4.0
3,1519816,6876410,East Irvine - Northwood-1489792,2025-12-12T00:00:00-08:00,33.716054,-117.74926,pm25,µg/m³,12.260417
4,2138,3842,Reseda-2138,2025-12-12T00:00:00-08:00,34.1992,-118.5331,pm25,µg/m³,14.8


In [32]:
sensor_ids = la_sensors_gdf.index

In [62]:
sensor_ids = list(la_sensors_gdf['sensors_id'])

In [63]:
sensor_ids[:20]

[1502,
 1837,
 1866,
 1876,
 22301,
 25500,
 4272146,
 25501,
 4272073,
 1888,
 24900,
 4272387,
 24901,
 4272396,
 2154,
 2152,
 2150,
 2244,
 2274,
 2554]

In [65]:
len(set(la_sensors_gdf['location_id']))

432

In [44]:
len(sensor_ids)

1971

In [71]:
sensor_ids[:10]

[1502, 1837, 1866, 1876, 22301, 25500, 4272146, 25501, 4272073, 1888]

In [72]:
ids_sql = ",".join(str(i) for i in sensor_ids)

In [82]:
ids_sql[:20]

'1502,1837,1866,1876,'

len(la_sensors_gdf['location_id'])

In [83]:
cursor.execute(f"""
SELECT * 
FROM openaq_db.openaqMeasurements 
WHERE sensors_id IN ({ids_sql})
ORDER BY datetime DESC
LIMIT 100;
""")
query_practice2 = cursor.fetchall()

In [84]:
query_practice2 = [query_practice2[i][:-3] for i in range(len(query_practice2))]
query_practice2_df = pd.DataFrame(data=query_practice2,columns=athena_cols)

In [86]:
query_practice2_df.head()

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,parameter,units,value
0,3293942,11489156,Yorba Linda-3263887,2025-12-12T00:00:00-08:00,33.88674,-117.70468,pm25,µg/m³,34.5605
1,3293942,11489160,Yorba Linda-3263887,2025-12-12T00:00:00-08:00,33.88674,-117.70468,pm1,µg/m³,22.955167
2,3293942,11489166,Yorba Linda-3263887,2025-12-12T00:00:00-08:00,33.88674,-117.70468,temperature,c,18.553833
3,3293942,11489150,Yorba Linda-3263887,2025-12-12T00:00:00-08:00,33.88674,-117.70468,relativehumidity,%,46.8915
4,3273175,11333890,Yorba Linda Lakebed Park-3243120,2025-12-12T00:00:00-08:00,33.87807,-117.81213,um003,particles/cm³,762.502336


I expected this to only return PM2.5 readings. I think I didn't trim the dataset in the first place.

### New

In [77]:
cursor.execute(f"""
SELECT * 
FROM openaq_db.openaqMeasurements 
ORDER BY datetime DESC
LIMIT 5;
""")
query_practice = cursor.fetchall()

In [78]:
query_practice = [query_practice[i][:-3] for i in range(len(query_practice))]
query_practice_df = pd.DataFrame(data=query_practice,columns=athena_cols)

In [81]:
query_practice_df.head()

Unnamed: 0,location_id,sensors_id,location,datetime,lat,lon,parameter,units,value
0,1052,25500,Pomona-1052,2025-12-12T00:00:00-08:00,34.0667,-117.7506,co,ppm,0.7
1,1052,25501,Pomona-1052,2025-12-12T00:00:00-08:00,34.0667,-117.7506,no2,ppm,0.0354
2,1052,1888,Pomona-1052,2025-12-12T00:00:00-08:00,34.0667,-117.7506,o3,ppm,0.001
3,1052,4272146,Pomona-1052,2025-12-12T00:00:00-08:00,34.0667,-117.7506,no,ppm,0.0289
4,1052,4272073,Pomona-1052,2025-12-12T00:00:00-08:00,34.0667,-117.7506,nox,ppm,0.0643
