This notebook will take OMI HDF5 files and aggregate them to Colorado tract and County boundaries.


STEP 1 - Convert netCDF to geotiff using gdal


STEP 1a - run gdalinfo

gdalinfo OMI-Aura_L3-OMDOAO3e_20160516-20160820.he5


STEP 1b - run gdal_translate

gdal_translate -of GTiff -a_srs EPSG:4326 -a_ullr -180 -90 180 90 HDF5:"OMI-Aura_L3-OMNO2d_20160516-20160820.he5"://HDFEOS/GRIDS/ColumnAmountNO2/Data_Fields/ColumnAmountNO2 output_no2.tif

STEP 2 - Zonal Statistics
In this step you will perform a zonal statistics and output .csv files.

In [1]:
# set up for zonal stats
import geopandas as gpd
import pandas as pd
import rasterio
from exactextract import exact_extract
from shapely.geometry import mapping

# CHANGE this to point to your shapefiles
shapeTract = r"/glade/derecho/scratch/boehnert/AQE/shapefile/Colorado_tracts.shp"
shapeCounty = r"/glade/derecho/scratch/boehnert/AQE/shapefile/Colorado_Counties.shp"

Zonal Stats for Colorado Tracts

In [2]:
# NO2
# TRACTS Aggregation

# Load shapefile and match CRS
tracts = gpd.read_file(shapeTract)
# CHANGE this to point to your geotiff you created in Step 1
#
rast = r"/glade/derecho/scratch/boehnert/AQE/OMI/output_no2.tif"
# Output CSV
#
out = r"/glade/derecho/scratch/boehnert/AQE/output/Omi_tract_no2_2016.csv"


# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    # means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, tracts, ["mean"], output="pandas")
    medians = exact_extract(rast, tracts, ["median"], output="pandas")
    std = exact_extract(rast, tracts, ["stdev"], output="pandas")
    count = exact_extract(rast, tracts, ["count"], output="pandas")

# Check length
print(len(means))  # Should be 1447

# Assign and export
tracts["MEAN"] = means
tracts["MEDIAN"] = medians
tracts["STD"] = std
tracts["COUNT"] = count


# Export desired fields
tracts[["FIPS", "SQKM", "COUNT", "MEAN", "MEDIAN", "STD"]].to_csv(out, index=False)
print(f"✅ CSV file created: {out}")

1447
✅ CSV file created: /glade/derecho/scratch/boehnert/AQE/output/Omi_tract_no2_2016.csv


Zonal Stats for County

In [3]:
# NO2
# COUNTIES Aggregation

# Load shapefile and match CRS
county = gpd.read_file(shapeCounty)
# CHANGE this to point to your geotiff you created in Step 1
#
rast = r"/glade/derecho/scratch/boehnert/AQE/OMI/output_no2.tif"
# Output CSV
#
out = r"/glade/derecho/scratch/boehnert/AQE/output/Omi_county_no2_2016.csv"


# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    # means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, county, ["mean"], output="pandas")
    medians = exact_extract(rast, county, ["median"], output="pandas")
    std = exact_extract(rast, county, ["stdev"], output="pandas")
    count = exact_extract(rast, county, ["count"], output="pandas")

# Check length
print(len(means))  # Should be 1447

# Assign and export
county["MEAN"] = means
county["MEDIAN"] = medians
county["STD"] = std
county["COUNT"] = count


# Export desired fields
county[["FIPS", "SQKM", "COUNT", "MEAN", "MEDIAN", "STD"]].to_csv(out, index=False)
print(f"✅ CSV file created: {out}")

64
✅ CSV file created: /glade/derecho/scratch/boehnert/AQE/output/Omi_county_no2_2016.csv


In [4]:
# ozone
# TRACTS Aggregation

# Load shapefile and match CRS
tracts = gpd.read_file(shapeTract)
# CHANGE this to point to your geotiff you created in Step 2
rast = r"/glade/derecho/scratch/boehnert/AQE/OMI/output_ozone3.tif"
# Output CSV
out = r"/glade/derecho/scratch/boehnert/AQE/output/Omi_tract_ozone_2016.csv"


# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    # means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, tracts, ["mean"], output="pandas")
    medians = exact_extract(rast, tracts, ["median"], output="pandas")
    std = exact_extract(rast, tracts, ["stdev"], output="pandas")
    count = exact_extract(rast, tracts, ["count"], output="pandas")

# Check length
print(len(means))  # Should be 1447

# Assign and export
tracts["MEAN"] = means
tracts["MEDIAN"] = medians
tracts["STD"] = std
tracts["COUNT"] = count


# Export desired fields
tracts[["FIPS", "SQKM", "COUNT", "MEAN", "MEDIAN", "STD"]].to_csv(out, index=False)
print(f"✅ CSV file created: {out}")

1447
✅ CSV file created: /glade/derecho/scratch/boehnert/AQE/output/Omi_tract_ozone_2016.csv


In [6]:
# ozone
# COUNTIES Aggregation

# Load shapefile and match CRS
county = gpd.read_file(shapeCounty)
# CHANGE this to point to your geotiff you created in Step 2
rast = r"/glade/derecho/scratch/boehnert/AQE/OMI/output_ozone3.tif"
# Output CSV
out = r"/glade/derecho/scratch/boehnert/AQE/output/Omi_county_ozone_2016.csv"


# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    # means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, county, ["mean"], output="pandas")
    medians = exact_extract(rast, county, ["median"], output="pandas")
    std = exact_extract(rast, county, ["stdev"], output="pandas")
    count = exact_extract(rast, county, ["count"], output="pandas")

# Check length
print(len(means))  # Should be 1447

# Assign and export
county["MEAN"] = means
county["MEDIAN"] = medians
county["STD"] = std
county["COUNT"] = count


# Export desired fields
county[["FIPS", "SQKM", "COUNT", "MEAN", "MEDIAN", "STD"]].to_csv(out, index=False)
print(f"✅ CSV file created: {out}")

64
✅ CSV file created: /glade/derecho/scratch/boehnert/AQE/output/Omi_county_ozone_2016.csv
