This script will aggregation EQUATES model output using census Tracts and Counties.  

STEP 1 - Write a new output netCDF that conforms to CF conventions and that contains the needed spatial metadata

In [33]:
import numpy as np
import xarray as xr
from pyproj import CRS, Proj, Transformer

# I got these values from the EQUATES netCDF data
# They will be in the Global Attributes of the file
# Double check that they are the same

xorig = -2556000.  # meters, projected coordinate system
yorig = -1728000.
cellsize = 12000.0
ncols = 459
nrows = 299

# Your LCC on a sphere (radius 6370000 m), from your WKT
proj4 = (
    "+proj=lcc +a=6370000 +b=6370000 "
    "+lat_1=40 +lat_2=40 +lat_0=40.0 "
    "+lon_0=-97 +x_0=0 +y_0=0 +units=m +no_defs"
)

# ==== Input and output netCDF file - Change as needed  ====

in_nc = r"/glade/derecho/scratch/boehnert/AQE/EQUATE/HR2DAY_LST_ACONC_EQUATES_v532_12US1_2016_MDA8_SIP_avg.nc"
out_nc = r"/glade/derecho/scratch/boehnert/AQE/EQUATE/HR2DAY2_EQUATES_summer_2016_avg.nc"
# ==== OPEN ====
ds = xr.open_dataset(in_nc)

# ---- find the row/col dim names in whatever case the file uses ----
def find_dim(candidates):
    for d in ds.dims:
        if d.lower() in candidates:
            return d
    raise ValueError(f"Could not find any of {candidates} in dims: {list(ds.dims)}")

col_dim = find_dim({"col", "ncols", "x"})
row_dim = find_dim({"row", "nrows", "y"})

# ==== CREATE PROJECTED COORDINATES ====
x = xorig + np.arange(ncols) * cellsize
y = yorig + np.arange(nrows) * cellsize

# ==== ASSIGN x/y, SWAP DIMS ====
ds = ds.assign_coords({"x": (col_dim, x), "y": (row_dim, y)})
ds = ds.swap_dims({col_dim: "x", row_dim: "y"})

# ==== CREATE LAT/LON 2D COORDINATES ====
xx, yy = np.meshgrid(x, y)
transformer = Transformer.from_crs(CRS.from_proj4(proj4), CRS.from_epsg(4326), always_xy=True)
lon, lat = transformer.transform(xx, yy)
ds = ds.assign_coords({"lon": (("y", "x"), lon), "lat": (("y", "x"), lat)})

# ==== ADD A CF-COMPLIANT CRS VARIABLE ====
ds["crs"] = xr.Variable((), 0)
ds["crs"].attrs = {
    "grid_mapping_name": "lambert_conformal_conic",
    "earth_radius": 6370000.0,
    "standard_parallel": [30.0, 60.0],
    "longitude_of_central_meridian": -97.0,
    "latitude_of_projection_origin": 40.00,
    "false_easting": 0.0,
    "false_northing": 0.0,
    "units": "m",
}

# Ensure O3/MDA8 use x/y dims and carry the grid_mapping attribute
for v in ["PM25_AVG", "O3_MDA8"]:
    if v in ds.data_vars:
        # nothing else needed: swap_dims already applied
        ds[v].attrs["grid_mapping"] = "crs"
    else:
        print(f"WARNING: variable {v} not found; skipping")

# (Optional) keep only the pieces you care about
keep = [v for v in ["PM25_AVG", "O3_MDA8", "lat", "lon", "crs"] if v in ds]
ds_out = ds[keep] if keep else ds

# ==== SAVE ====
ds_out.to_netcdf(out_nc)
print("Wrote", out_nc)


Wrote /glade/derecho/scratch/boehnert/AQE/EQUATE/HR2DAY2_EQUATES_summer_2016_avg.nc


STEP 2 - Convert to a geotiff

In [34]:
#Step2 convert to geotif
import xarray as xr
import rasterio
from rasterio.transform import from_origin

ds = xr.open_dataset(r"/glade/derecho/scratch/boehnert/AQE/EQUATE/HR2DAY2_EQUATES_summer_2016_avg.nc")   # the one with x,y coords
data = ds["O3_MDA8"].isel(TSTEP=0, LAY=0)

# ==== Flip the array ====

data_flipped = data.values[::-1, :]

# ==== x/y coordinates ====
# 
x = data["x"].values
y = data["y"].values

dx = x[1] - x[0]
dy = y[1] - y[0]

# ==== Transform: origin at upper-left (northernmost row) ====
#
transform = from_origin(x.min() - dx/2, y.max() + dy/2, dx, dy)

# ==== CRS (proj string from ds) ====
# 
crs = "+proj=lcc +a=6370000 +b=6370000 +lat_1=30 +lat_2=60 +lat_0=40.0000076293945 +lon_0=-97 +x_0=0 +y_0=0 +units=m +no_defs"

# ==== Write GeoTIFF ====
#  Change location
out_tif = r"/glade/derecho/scratch/boehnert/AQE/rasters/HR2DAY5_equate_summer_MDA8_2016.tif"

with rasterio.open(
     out_tif,
    "w",
    driver="GTiff",
    height=data_flipped.shape[0],
    width=data_flipped.shape[1],
    count=1,
    dtype=data_flipped.dtype,
    crs=crs,
    transform=transform,
) as dst:
    dst.write(data_flipped, 1)









In [1]:
STEP 3 - Project the geotiff to WGS84

SyntaxError: invalid syntax (3907125038.py, line 1)

In [30]:
#reproject raster to WGS84
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

src_path = r"/glade/derecho/scratch/boehnert/AQE/rasters/HR2DAY5_equate_MDA8_2016.tif"
dst_path = r"/glade/derecho/scratch/boehnert/AQE/rasters/HR2DAY_wgs84equate_MDA8_2016.tif"

with rasterio.open(src_path) as src:
    # Define target CRS: WGS84 (EPSG:4326)
    dst_crs = "EPSG:4326"

    # Calculate transform, width, height for the reprojected raster
    transform, width, height = calculate_default_transform(
        src.crs, dst_crs, src.width, src.height, *src.bounds
    )

    # Metadata for the new raster
    kwargs = src.meta.copy()
    kwargs.update({
        "crs": dst_crs,
        "transform": transform,
        "width": width,
        "height": height
    })

    # Create output file and reproject
    with rasterio.open(dst_path, "w", **kwargs) as dst:
        reproject(
            source=rasterio.band(src, 1),
            destination=rasterio.band(dst, 1),
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=transform,
            dst_crs=dst_crs,
            resampling=Resampling.nearest  # or Resampling.bilinear
        )

print("✅ Reprojected GeoTIFF written to:", dst_path)


✅ Reprojected GeoTIFF written to: /glade/derecho/scratch/boehnert/AQE/rasters/HR2DAY_wgs84equate_MDA8_2016.tif


STEP 4 - Perform zonal stats to aggregate to tracts and counties

In [25]:
# ==== set up libraries ====
import geopandas as gpd
from exactextract import exact_extract
import rasterio
import pandas as pd
from shapely.geometry import mapping

# ==== Input Shapefiles to be used in aggregation - change to your locations ====
shapeTract = r"/glade/derecho/scratch/boehnert/AQE/shapefile/Colorado_tracts.shp"
shapeCounty = r"/glade/derecho/scratch/boehnert/AQE/shapefile/Colorado_Counties.shp"

In [31]:
# ==== PM2.5 - TRACTS aggregation ====

# ==== Load shapefile and match CRS ====
# 
tracts = gpd.read_file(shapeTract)

# ====Geotif locations - change to your location ====
#
rast = r"/glade/derecho/scratch/boehnert/AQE/rasters/HR2DAY_wgs84equate_MDA8_2016.tif"
# ====Output CSV - change to your location ====
#
out = r"/glade/derecho/scratch/boehnert/AQE/output/equate_tract_MDA8_2016.csv"

# ====  Open PM2.5 raster ====
# 
with rasterio.open(rast) as src:
   
    means = exact_extract(rast, tracts, ["mean"], output='pandas')
    medians = exact_extract(rast, tracts, ["median"], output='pandas')
    std = exact_extract(rast, tracts, ["stdev"], output='pandas')
    count = exact_extract(rast, tracts, ["count"], output='pandas')
    
# Check length
print(len(means))  # Should be 1447

# ====  Assign and export ====

tracts['MEAN'] = means
tracts['MEDIAN'] = medians
tracts['STD'] = std
tracts["COUNT"] = count


# Export desired fields
tracts[['FIPS', 'SQKM', 'COUNT', 'MEAN', 'MEDIAN', 'STD']].to_csv(out, index=False)

1447


In [4]:
# ====  MDA8 - Aggregate to TRACTS ====

# ==== Load shapefile - change to your location ====
tracts = gpd.read_file(shapeTract)

# ====  Input Raster - change to your location ====
rast = r"/glade/derecho/scratch/boehnert/AQE/rasters/sip_mda8_2016.tif"

# ====  Output CSV - change to your location ====
out = r"/glade/derecho/scratch/boehnert/AQE/output/sip_tract_mda8_2016.csv"


# ====  Open raster ====
with rasterio.open(rast) as src:
    means = exact_extract(rast, tracts, ["mean"], output='pandas')
    medians = exact_extract(rast, tracts, ["median"], output='pandas')
    std = exact_extract(rast, tracts, ["stdev"], output='pandas')
    count = exact_extract(rast, tracts, ["count"], output='pandas')
    
# Check length
print(len(means))  # Should be 1447


# Assign and export
tracts['MEAN'] = means
tracts['MEDIAN'] = medians
tracts['STD'] = std
tracts["COUNT"] = count


# Export desired fields
tracts[['FIPS', 'SQKM', 'COUNT', 'MEAN', 'MEDIAN', 'STD']].to_csv(out, index=False)

1447


In [5]:
# ====  FPRM - Aggregate to TRACTS ====

# ==== Load shapefile - change to your location ====
tracts = gpd.read_file(shapeTract)

# ====  Input Raster - change to your location ====
rast = r"/glade/derecho/scratch/boehnert/AQE/rasters/sip_fprm_2016.tif"

# ====  Output CSV - change to your location ====
out = r"/glade/derecho/scratch/boehnert/AQE/output/sip_tract_fprm_2016.csv"


# ====  Open raster ====
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    #means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, tracts, ["mean"], output='pandas')
    medians = exact_extract(rast, tracts, ["median"], output='pandas')
    std = exact_extract(rast, tracts, ["stdev"], output='pandas')
    count = exact_extract(rast, tracts, ["count"], output='pandas')
    
# Check length
print(len(means))  # Should be 1447


# Assign and export
tracts['MEAN'] = means
tracts['MEDIAN'] = medians
tracts['STD'] = std
tracts["COUNT"] = count


# Export desired fields
tracts[['FIPS', 'SQKM', 'COUNT', 'MEAN', 'MEDIAN', 'STD']].to_csv(out, index=False)

1447


In [7]:
# ====  MDA8 - Aggregate to COUNTIES ====

# ==== Load shapefile - change to your location ====
tracts = gpd.read_file(shapeCounty)

# ==== Input raster - change to your location ====
rast = r"/glade/derecho/scratch/boehnert/AQE/rasters/sip_mda8_2016.tif"

# ==== Output CSV - change to your location ====
out = r"/glade/derecho/scratch/boehnert/AQE/output/sip_country_mda8_2016.csv"

# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    #means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, tracts, ["mean"], output='pandas')
    medians = exact_extract(rast, tracts, ["median"], output='pandas')
    std = exact_extract(rast, tracts, ["stdev"], output='pandas')
    count = exact_extract(rast, tracts, ["count"], output='pandas')
    
# Check length
print(len(means))  # Should be 1447

    
# Assign and export
tracts['MEAN'] = means
tracts['MEDIAN'] = medians
tracts['STD'] = std
tracts["COUNT"] = count


# Export desired fields
tracts[['FIPS', 'SQKM', 'COUNT', 'MEAN', 'MEDIAN', 'STD']].to_csv(out, index=False)


64


In [9]:
# ====  FPRM - Aggregate to COUNTIES ====

# ==== Load shapefile - change to your location ====
tracts = gpd.read_file(shapeCounty)

# ==== Input raster - change to your location ====
rast = r"/glade/derecho/scratch/boehnert/AQE/rasters/sip_fprm_2016.tif"

# ==== Output CSV - change to your location ====
out = r"/glade/derecho/scratch/boehnert/AQE/output/sip_county_fprm_2016.csv"


# Open NO₂ raster
with rasterio.open(rast) as src:
    # Extract weighted means (GeoJSON-style input)
    #means = [exact_extract(src, mapping(geom), ['mean'])[0]['mean'] for geom in tracts.geometry]
    means = exact_extract(rast, tracts, ["mean"], output='pandas')
    medians = exact_extract(rast, tracts, ["median"], output='pandas')
    std = exact_extract(rast, tracts, ["stdev"], output='pandas')
    count = exact_extract(rast, tracts, ["count"], output='pandas')
    
# Check length
print(len(means))  # Should be 1447
    
# Assign and export
tracts['MEAN'] = means
tracts['MEDIAN'] = medians
tracts['STD'] = std
tracts["COUNT"] = count


# Export desired fields
tracts[['FIPS', 'SQKM', 'COUNT', 'MEAN', 'MEDIAN', 'STD']].to_csv(out, index=False)

64


Exception: Unhandled feature datatype

DriverError: Failed to read GeoJSON data