This notebook will take TropOMI netCDF files and aggregate them to Colorado tract and County boundaries.


STEP 1 - convert netCDF to geotiff
In this first step you will create a geotiff from your netCDF.  

In [2]:
# this works for TropOmi
# convert netCDF to geotiff

import numpy as np
import rasterio
import xarray as xr
from rasterio.transform import from_origin

# --- Load NetCDF file ---
#
# Change this based on where your netCDF is stored
#
netcdf_path = r"/glade/derecho/scratch/boehnert/AQE/tropOmi/2027 Presidential AI Challenge"
#
# --- Save as GeoTIFF ---
# change this to where you want your geotiff to be stored
#
tif_path = r"/glade/derecho/scratch/boehnert/AQE/rasters/tropomi_n02_2022.tif"

# Open dataset using xarray
ds = xr.open_dataset(netcdf_path)

# --- Inspect available variables ---
print(ds.data_vars)

# Choose your variable — adjust if needed
var_name = "Tropospheric_NO2"  # or use print(ds.data_vars) to confirm

# Read variable and coordinates
no2 = ds[var_name][:, :]
lat = ds["Latitude"][:]
lon = ds["Longitude"][:]


# Make sure dimensions match
print("NO2 shape:", no2.shape)
print("Lat size:", len(lat))
print("Lon size:", len(lon))

# --- Flip data vertically to match GeoTIFF convention ---
no2_flipped = np.flipud(no2.values)
lat_flipped = lat[::-1]  # To match flipped data

# --- Define transform (top-left corner = NW) ---
res_lon = np.abs(lon[1] - lon[0])
res_lat = np.abs(lat[1] - lat[0])

transform = from_origin(
    west=lon.min() - res_lon / 2,
    north=lat.max() + res_lat / 2,  # Because we flipped lat
    xsize=res_lon,
    ysize=res_lat,
)


# --- Handle fill values ---
no2_masked = np.ma.masked_invalid(no2_flipped)
nodata_val = -9999

# --- Save as GeoTIFF ---

raster_meta = {
    "driver": "GTiff",
    "height": no2_masked.shape[0],
    "width": no2_masked.shape[1],
    "count": 1,
    "dtype": "float32",
    "crs": "EPSG:4326",
    "transform": transform,
    "nodata": nodata_val,
}

with rasterio.open(tif_path, "w", **raster_meta) as dst:
    dst.write(no2_masked.filled(nodata_val).astype("float32"), 1)

print(f"✅ Corrected GeoTIFF saved: {tif_path}")




ValueError: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy', 'rasterio', 'zarr']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
http://xarray.pydata.org/en/stable/getting-started-guide/installing.html
http://xarray.pydata.org/en/stable/user-guide/io.html

STEP 2 - Zonal Statistics
In this step you will perform a zonal statistics and output .csv files.

In [None]:
# set up for zonal stats
import geopandas as gpd
import pandas as pd
import rasterio
from exactextract import exact_extract
from shapely.geometry import mapping

# CHANGE this to point to your shapefiles
shapeTract = r"/glade/derecho/scratch/boehnert/AQE/shapefile/Colorado_tracts.shp"
shapeCounty = r"/glade/derecho/scratch/boehnert/AQE/shapefile/Colorado_Counties.shp"

Zonal Stats for Colorado Tracts

In [None]:
# NO2
# zonal stats to tracts

# Load shapefile and match CRS
tracts = gpd.read_file(shapeTract)
# CHANGE this to point to your geotiff you created in Step 1
#
rast = r"/glade/derecho/scratch/boehnert/AQE/rasters/tropomi_n02_2022.tif"
# Output CSV
# Change this to be where you want to store the CSV
#
out = r"/glade/derecho/scratch/boehnert/AQE/output/tropOmi_tract_no2_2022.csv"


# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    # means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, tracts, ["mean"], output="pandas")
    medians = exact_extract(rast, tracts, ["median"], output="pandas")
    std = exact_extract(rast, tracts, ["stdev"], output="pandas")
    count = exact_extract(rast, tracts, ["count"], output="pandas")

# Check length
print(len(means))  # Should be 1447

# Assign and export
tracts["MEAN"] = means
tracts["MEDIAN"] = medians
tracts["STD"] = std
tracts["COUNT"] = count


# Export desired fields
tracts[["FIPS", "SQKM", "COUNT", "MEAN", "MEDIAN", "STD"]].to_csv(out, index=False)
print(f"✅ CSV file created: {out}")

Zonal Stats for County

In [None]:
# NO2
# zonal stats to counties

# Load shapefile and match CRS
county = gpd.read_file(shapeCounty)
# CHANGE this to point to your geotiff you created in Step 1
#
rast = r"/glade/derecho/scratch/boehnert/AQE/rasters/tropomi_n02_2022.tif"
# Output CSV
# Change to be where you are storing the CSV
#
out = r"/glade/derecho/scratch/boehnert/AQE/output/tropOmi_tract_no2_2022.csv"


# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    # means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, county, ["mean"], output="pandas")
    medians = exact_extract(rast, county, ["median"], output="pandas")
    std = exact_extract(rast, county, ["stdev"], output="pandas")
    count = exact_extract(rast, county, ["count"], output="pandas")

# Check length
print(len(means))  # Should be 1447

# Assign and export
county["MEAN"] = means
county["MEDIAN"] = medians
county["STD"] = std
county["COUNT"] = count


# Export desired fields
county[["FIPS", "SQKM", "COUNT", "MEAN", "MEDIAN", "STD"]].to_csv(out, index=False)
print(f"✅ CSV file created: {out}")