### Convert NHDv2 Plus surface water data to available water per technology rasters

In [None]:
import os
import glob

import rasterio
import numpy as np
import pandas as pd
import geopandas as gpd


### Download supporting data

**NHDv2 Plus** data for surface water flow downloaded from datafrom https://ezmt.anl.gov/.  These data include flow rates in gallons per minute (gpm) for each flowline segment.

**GRIDCERF pacakge** data can be downloaded from https://doi.org/10.5281/zenodo.6601790

Vernon, C. R., Nelson, K., Mongird, K., & Rice, J. S. (2022). GRIDCERF: Geospatial Raster Input Data for Capacity Expansion Regional Feasibility (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.6601790


### Set file paths

In [None]:
# directory containing the downloaded shapefile from EZMT
nhd_data_directory = '<your directory>'

# path to the NHDv2 Plus shapefile
nhd_shp_file = os.path.join(nhd_data_directory, 'ez_gis.surface_water_flow_nhdplus_v2_erom_eispc_v2.shp')

# path to the GRIDCERF technology specific layer directory
output_dir = '<your directory>/GRIDCERF/Tech_Specific_Layers'

# path to a raster to use as a template from the GRIDCERF package
template_raster = os.path.join(output_dir, 'cerf_railnodes5km_navwaters5km_conus.tif')

# temporary directory to hold buffered surface flow shapefiles that will be rasterized
water_shp_dir = '<your directory>'


### Process data

In [None]:
# get target coordinate reference system from template raster
with rasterio.open(template_raster) as src:
    target_crs = src.crs


#### Preprocess surface flow shapefile

In [None]:
%%time 

# only keep gallons per minute flow and geometry and reproject
gdf = gpd.read_file(nhd_shp_file)[['q_gpm', 'geometry']].to_crs(target_crs)

# convert to millions gallons per day
gdf['mgd'] = (gdf['q_gpm'] / 1000000) * 60 * 24

# drop gpm field
gdf.drop(columns=['q_gpm'], inplace=True)

# set raster value
gdf['value'] = 0

gdf.mgd.describe()


#### Bins for minimum mean annual flow requirements where the key is the target file name and the value is the threshold in MGD


In [None]:
bins = {'cerf_nhd2plus_surfaceflow_greaterthan145mgd_buffer20km': 145,
        'cerf_nhd2plus_surfaceflow_greaterthan120mgd_buffer20km': 120,
        'cerf_nhd2plus_surfaceflow_greaterthan110mgd_buffer20km': 110,
        'cerf_nhd2plus_surfaceflow_greaterthan95mgd_buffer20km': 95,
        'cerf_nhd2plus_surfaceflow_greaterthan75mgd_buffer20km': 75,
        'cerf_nhd2plus_surfaceflow_greaterthan70mgd_buffer20km': 70,
        'cerf_nhd2plus_surfaceflow_greaterthan55mgd_buffer20km': 55,
        'cerf_nhd2plus_surfaceflow_greaterthan40mgd_buffer20km': 40,
        'cerf_nhd2plus_surfaceflow_greaterthan35mgd_buffer20km': 35,
        'cerf_nhd2plus_surfaceflow_greaterthan25mgd_buffer20km': 25,
        'cerf_nhd2plus_surfaceflow_greaterthan10mgd_buffer20km': 10,
        'cerf_nhd2plus_surfaceflow_greaterthan2mgd_buffer20km': 2}


#### Create a buffered shapefile of flowlines matching the flow requirement

In [None]:
%%time

for i in bins.keys():
    
    print(f"Processing:  {i}")
    
    # extract the flowlines that support the minimum flow requirement
    gdx = gdf.loc[gdf['mgd'] > bins[i]].copy()

    # buffer by 20 km (20000 meters)
    gdx['geometry'] = gdx.buffer(20000)
    
    # construct output file path
    output_shp = os.path.join(water_shp_dir, f"{i}.shp")
    
    # write output shapefile
    gdx[['value', 'geometry']].to_file(output_shp)



#### Rasterize each buffered shapefile into GRIDCERF's spatial requirements

In [None]:
%%time 

# build a list of buffered shapefiles in the temporary directory
shp_list = glob.glob(os.path.join(water_shp_dir, 'cerf_nhd2plus_surfaceflow_greaterthan*mgd_buffer20km.shp'))

for i in shp_list:
    
    # extract the basename
    basename = os.path.basename(i)
    base_noext = os.path.splitext(basename)[0]
    
    # construct the output raster name
    output_raster = os.path.join(output_dir, f"{base_noext}.tif")

    # construct the GDAL raster command
    gdal_rasterize_cmd = f"gdal_rasterize -l {base_noext} -a value -tr 1000.0 1000.0 -init 1.0 -te -2405552.8355 -1389065.2005 2287447.1645 1609934.7995 -ot Int16 -of GTiff {i} {output_raster}"
    
    # execute the GDAL command via the system terminal
    os.system(gdal_rasterize_cmd)
