### Prepare basin GRU (hydrologic unit or GRU) and flowline shapefiles ###

#### If these don't both pre-exist, run this script before all other scripts ####

This script includes:<br>
1a. if needed, extract basin GRU shapefile from a large-domain GRU shapefile.<br> 
1b. write basin gruId.txt list
2. extract basin flowlines shapefile from a large-domain flowlines shapefile.<br>
3. reproject basin GRU and flowlines shapefiles to a common equal coordinate system. <br>
The script also makes some directories used in the discretization process.

In [31]:
# import libraries
import os, sys
sys.path.append('../')
import functions.geospatial_analysis as ga
import functions.utils as ut
import geopandas as gpd
import rasterio as rio
from rasterio.warp import Resampling
import functions.ogr2ogr as ogr2ogr
import numpy as np

#### Set up paths, filenames, directories ####

In [32]:
# common paths
control_file    = '../../control/control.txt.tuolumne'
basin_data_path = ut.read_from_control(control_file, 'basin_data_path')
basin_name      = ut.read_from_control(control_file, 'basin_name')

In [33]:
# make standard directories
if not os.path.exists(basin_data_path):
    os.makedirs(basin_data_path)
plot_path  = os.path.join(basin_data_path, 'plots/')
if not os.path.exists(plot_path):
    os.makedirs(plot_path)
gis_path  = os.path.join(basin_data_path, 'gis/')
if not os.path.exists(gis_path):
    os.makedirs(gis_path)

In [34]:
# projection system
new_epsg = ut.read_from_control(control_file, 'epsg') 
dest_crs = rio.crs.CRS.from_epsg(new_epsg)

In [35]:
# set basin shapefiles
basin_gru_shp           = ut.set_filename(control_file, 'basin_gru_shp')  # may exist
basin_flowlines_shp     = ut.set_filename(control_file, 'basin_flowlines_shp') # may exist; is always _prj

# gru fieldname and text file
gru_fieldname           = ut.read_from_control(control_file, 'gru_fieldname')      
basin_gruId_txt         = ut.set_filename(control_file, 'basin_gruId_txt')

# derived filenames
basin_gru_prj_shp       = basin_gru_shp.split('.shp')[0]+'_prj.shp'

#### Set basin GRU shapefile (extract from larger full-domain if needed) ####

In [36]:
# if the basin shapefile doesn't exist, it needs to be extracted from another larger GRU shapefile
if not os.path.exists(basin_gru_shp):

    # ---- extract basin GRU shapefile and ID list from a larger full-domain GRU shapefile ---- 

    # read filename and other necessary info
    fulldom_gru_shp   = ut.read_from_control(control_file, 'fulldom_gru_shpfile')
    outlet_gruId      = ut.read_from_control(control_file, 'basin_outlet_gruId')
    toGRU_fieldname   = ut.read_from_control(control_file, 'toGRU_fieldname')
    data              = gpd.read_file(fulldom_gru_shpfile)
    
    # check whether two useful columns (gru_field, toGRU_field) are in gru_shp.
    if not gru_fieldname in data.columns.values:
        exit(gru_fieldname + ' column does not exist in shapefile.')
    else:
        grus = data[gru_fieldname].values
    if not toGRU_fieldname in data.columns.values:
        exit(toGRU_fieldname + ' column does not exist in shapefile.')
    else:
        togrus = data[toGRU_fieldname].values
    # extract only the useful columns to save data memory.
    data = data[[gru_fieldname, toGRU_fieldname, 'geometry']] 

    # ---- search upstream GRUs ---- 
    # method 1: search upstream grus base on the most downstream gruId
    upstream_grus = [outlet_gruid]           # list of upstream grus. initiate with outlet_gruid
    gru_found     = np.unique(grus[np.where(togrus==outlet_gruId)]) # find all the upstream grus that drain to outlet_gruid.
    upstream_grus.extend(list(gru_found))    # add the found upstream grus of outlet_gruid to upstream_grus list
    round_num     = 0                        # record the round number of searching.

    while len(gru_found) != 0: # terminate searching upstream grus until no one can be found any more.
        round_num = round_num+1
        print("Round %d: %d GRUs found." % (round_num, len(upstream_grus)))

        # search upstream grus
        gru_found_next = []
        for gru_i in gru_found:
            gru_found_next.extend(list(grus[np.where(togrus==gru_i)]))
        gru_found_next = unique(gru_found_next)

        # identify if the found GRUs exist in upstrm_grus
        gru_found = [gru for gru in gru_found_next if not gru in upstream_grus]
        upstream_grus.extend(gru_found)

        # alternate method: manually add upstream_grus when the list of upstream grus is known. 
        #upstream_grus= np.loadtxt('/glade/u/home/andywood/proj/SHARP/wreg/bighorn/prep/lists/gruIds.06279940.txt',dtype=int)

    # ---- save upstream GRU shapefile ---- 
    data[data[gru_fieldname].isin(upstream_grus)].to_file(basin_gru_shp)

In [37]:
# read the basin shapefile and write gruId list
data = gpd.read_file(basin_gru_shp)
if not gru_fieldname in data.columns.values:
    exit(gru_fieldname + ' column does not exist in shapefile ', basin_gru_shp)
else:
    grus = data[gru_fieldname].values
    
if 'int' in str(grus.dtype):
    np.savetxt(basin_gruId_txt, grus, fmt='%d')
else:
    np.savetxt(basin_gruId_txt, grus, fmt='%s')
print('wrote gruId file for the target basin %s: %s' % (basin_name, basin_gruId_txt))

wrote gruId file for the target basin tuolumne: /glade/work/andywood/complexity/basins/smada/tuolumne/gruIds.txt


In [38]:
# reproject basin GRU shapefile if it doesn't exist
if not os.path.exists(basin_gru_prj_shp):
    ga.reproject_vector(basin_gru_shp, basin_gru_prj_shp, new_epsg)
print('reprojected basin GRUs:', basin_gru_prj_shp)

# Alternative method: use ogr2ogr
#if not os.path.exists(basin_gru_prj_shp):
#    ga.reproject_basin_shapefile(basin_gru_shp, basin_gru_prj_shp, dst_crs)
#in_gdf_prj = gpd.read_file(basin_gru_prj_shp)    # read projected file in using geopandas

reprojected basin GRUs: /glade/work/andywood/complexity/basins/smada/tuolumne/gis/huc12x.tuolumne_prj.shp


#### Extract basin flowline shapefile ####

In [40]:
# -- extract basin flowlines from full-dom flowlines file if it doesn't exist
#    note that the basin flowlines shapefile will be in the common projected coordinates (new_epsg)
#    this step can take a few minutes (wait for 'done')
if not os.path.exists(basin_flowlines_shp):
    
    # may need to reproject full-domain flowlines shapefile first
    flowlines_shp     = ut.read_from_control(control_file, 'fulldom_flowlines_shp')
    flowlines_prj_shp = flowlines_shp.split('.shp')[0]+'_prj.shp' 
    if not os.path.exists(flowlines_prj_shp):
        ga.reproject_vector(flowlines_shp, flowlines_prj_shp, new_epsg)
        print('reprojected full domain streams:', flowlines_prj_shp)
        
    # read stream and boundary files (projected)
    flowlines_gpd = gpd.read_file(flowlines_prj_shp)
    basin_gru_gpd = gpd.read_file(basin_gru_prj_shp)
    print('read reprojected shapefiles for clipping flowlines')    

    # create basin outer boundary shapefile 
    tmp_gpd                  = basin_gru_gpd[['geometry']]
    basin_gru_gpd['tmp_col'] = 0         # create null column for dissolve
    basin_boundary_gpd       = basin_gru_gpd.dissolve(by='tmp_col')
    basin_boundary_prj_shp   = basin_gru_prj_shp.split('.shp')[0]+'_boundary.shp'
    basin_boundary_gpd.to_file(basin_boundary_prj_shp)
    print('wrote basin boundary shapefile to use in stream clipping:', basin_boundary_prj_shp) 
    
    # clip full-dom reprojected flowlines with basin boundary     
    #   note: if geopandas version < 0.7, cannot use clip(), so instead use ogr2ogr
    if float(gpd.__version__.split(".")[0]+"."+gpd.__version__.split(".")[1]) >= 0.7:
        in_gpd_clip = gpd.clip(flowlines_gpd, basin_boundary_gpd)
        in_gpd_clip.to_file(basin_flowlines_shp)
    else:
        print('Note: using ogr2ogr to clip streams to basin')
        driverName = 'ESRI Shapefile'    # can later be upgraded to work with geopackages (eg 'GPKG')
        ogr2ogr.main(["", "-f", driverName, "-clipsrc", basin_boundary_prj_shp, basin_flowlines_shp, flowlines_prj_shp]) 
        
    print('wrote basin-clipped stream shapefile:', basin_flowlines_shp)
    print('done')

read reprojected shapefiles for clipping flowlines
wrote basin boundary shapefile to use in stream clipping: /glade/work/andywood/complexity/basins/smada/tuolumne/gis/huc12x.tuolumne_prj_boundary.shp
Note: using ogr2ogr to clip streams to basin, writing /glade/work/andywood/complexity/basins/smada/tuolumne/gis/flowlines_prj.shp
wrote basin-clipped stream shapefile: /glade/work/andywood/complexity/basins/smada/tuolumne/gis/flowlines_prj.shp
