### Prepare basin GRU (hydrologic unit or HUC) and flowline shapefiles ###

#### If these don't both pre-exist, run this script before all other scripts ####

This script includes:<br>
1a. if needed, extract basin GRU shapefile from a large-domain GRU shapefile.<br> 
1b. write basin hucId.txt list
2. extract basin flowlines shapefile from a large-domain flowlines shapefile.<br>
3. reproject basin GRU and flowlines shapefiles to a common equal coordinate system. <br>
The script also makes some directories used in the discretization process.

In [None]:
# import libraries
import os, sys
sys.path.append('../')
import functions.geospatial_analysis as ga
import functions.utils as ut
import geopandas as gpd
import rasterio as rio
from rasterio.warp import Resampling
import functions.ogr2ogr as ogr2ogr
import numpy as np

#### Set up paths, filenames, directories ####

In [None]:
# common paths
control_file    = '../control.tpl.txt'
source_path     = ut.read_from_control(control_file, 'source_path')           # must exist with input full-domain data
basin_data_path = ut.read_from_control(control_file, 'basin_data_path')
basin_name      = ut.read_from_control(control_file, 'basin_name')

In [None]:
# make standard directories
if not os.path.exists(basin_data_path):
    os.makedirs(basin_data_path)
plot_path  = os.path.join(basin_data_path, 'plots/')
if not os.path.exists(plot_path):
    os.makedirs(plot_path)
gis_path  = os.path.join(basin_data_path, 'gis/')
if not os.path.exists(gis_path):
    os.makedirs(gis_path)

In [None]:
# projection system
new_epsg = ut.read_from_control(control_file, 'epsg') 
dest_crs = rio.crs.CRS.from_epsg(new_epsg)

In [None]:
# set basin shapefiles
basin_gru_shp       = ut.read_from_control(control_file, 'basin_gru_shp')  # may exist
basin_flowlines_shp = ut.set_filename(control_file, 'basin_flowlines_shp') # may exist; is always _prj

# derived filenames
basin_gru_prj_shp   = basin_gru_shp.split('.shp')[0]+'_prj.shp'

# huc fieldname and text file
huc_fieldname       = ut.read_from_control(control_file, 'huc_fieldname')      
basin_hucId_txt     = ut.set_filename(control_file, 'basin_hucId_txt')   

In [None]:
# GRU field definitions      # used?
gruNo_fieldname   = ut.read_from_control(control_file, 'gruNo_fieldname')    
gruNo_field_dtype = ut.read_from_control(control_file, 'gruNo_field_dtype')
gruId_fieldname   = ut.read_from_control(control_file, 'gruId_fieldname')

#### Set basin GRU shapefile (extract from larger full-domain if needed) ####

In [None]:
# if the basin shapefile doesn't exist, it needs to be extracted from another larger HUC/GRU shapefile
if not os.path.exists(basin_gru_shp):

    # ---- extract basin GRU shapefile and ID list from a larger full-domain GRU / HUC shapefile ---- 

    # read filename and other necessary info
    fulldom_huc_shp   = ut.read_from_control(control_file, 'fulldom_huc_shpfile')
    outlet_hucId      = ut.read_from_control(control_file, 'outlet_hucId')
    toHuc_fieldname   = ut.read_from_control(control_file, 'toHuc_fieldname')
    data = gpd.read_file(fulldom_huc_shp)
    
    # check whether two useful columns (huc_field, toHuc_field) are in huc_shp.
    if not huc_fieldname in data.columns.values:
        exit(huc_fieldname + ' column does not exist in shapefile.')
    else:
        hucs = data[huc_fieldname].values
    if not toHuc_fieldname in data.columns.values:
        exit(toHuc_fieldname + ' column does not exist in shapefile.')
    else:
        tohucs = data[toHuc_fieldname].values
    # extract only the useful columns to save data memory.
    data = data[[huc_fieldname, toHuc_fieldname, 'geometry']] 

    # ---- search upstream HUCs ---- 
    # method 1: search upstream hucs base on the most downstream hucId
    upstream_hucs = [outlet_hucid]                              # list of upstream hucs. initiate with outlet_hucid
    huc_found     = np.unique(hucs[np.where(tohucs==outlet_hucId)]) # find all the upstream hucs that drain to outlet_hucid.
    upstream_hucs.extend(list(huc_found))                       # add the found upstream hucs of outlet_hucid to upstream_hucs list
    round_num     = 0                                               # record the round number of searching.

    while len(huc_found) != 0: # terminate searching upstream hucs until no one can be found any more.
        round_num = round_num+1
        print("Round %d: %d HUCs found." % (round_num, len(upstream_hucs)))

        # search upstream hucs
        huc_found_next = []
        for huc_i in huc_found:
            huc_found_next.extend(list(hucs[np.where(tohucs==huc_i)]))
        huc_found_next = unique(huc_found_next)

        # identify if the found HUCs exist in upstrm_hucs
        huc_found = [huc for huc in huc_found_next if not huc in upstream_hucs]
        upstream_hucs.extend(huc_found)

        # alternate method: manually add upstream_hucs when the list of upstream hucs is known. 
        #upstream_hucs= np.loadtxt('/glade/u/home/andywood/proj/SHARP/wreg/bighorn/prep/lists/hucIds.06279940.txt',dtype=int)

    # ---- save upstream GRU shapefile ---- 
    data[data[huc_fieldname].isin(upstream_hucs)].to_file(basin_gru_shp)

In [None]:
# read the basin shapefile and write hucId list
data = gpd.read_file(basin_gru_shp)
if not huc_fieldname in data.columns.values:
    exit(huc_fieldname + ' column does not exist in shapefile ', basin_gru_shp)
else:
    hucs = data[huc_fieldname].values
    
if 'int' in str(hucs.dtype):
    np.savetxt(basin_hucId_txt, hucs, fmt='%d')
else:
    np.savetxt(basin_hucId_txt, hucs, fmt='%s')
print('wrote hucId file for the target basin %s: %s' % (basin_name, basin_hucId_txt)

In [None]:
# if needed, reproject basin GRU shapefile
if not os.path.exists(basin_gru_prj_shp):
    ga.reproject_vector(basin_gru_shp, basin_gru_prj_shp, new_epsg)
print('reprojected basin GRUs:', basin_gru_prj_shp)

# Alternative method: use ogr2ogr
#if not os.path.exists(basin_gru_prj_shp):
#    ga.reproject_basin_shapefile(basin_gru_shp, basin_gru_prj_shp, dst_crs)
#in_gdf_prj = gpd.read_file(basin_gru_prj_shp)    # read projected file in using geopandas

#### Extract basin flowline shapefile ####

In [None]:
# -- extract basin flowlines from full-dom flowlines file if needed
if not os.path.exists(basin_flowlines_shp):
    
    # may need to reproject full-domain flowlines shapefile first
    flowlines_shp     = ut.read_from_control(control_file, 'fulldom_flowlines_shp')
    flowlines_prj_shp = flowlines_shp.split('.shp')[0]+'_prj.shp' 
    if not os.path.exists(flowlines_prj_shp):
        ga.reproject_vector(flowlines_shp, flowlines_prj_shp, new_epsg)
        print('reprojected full domain streams:', flowlines_prj_shp)
        
    # read stream and boundary files (projected)
    flowlines_gpd = gpd.read_file(flowlines_prj_shp)
    basin_gru_gpd = gpd.read_file(basin_gru_prj_shp)
    print('read reprojected shapefiles for clipping flowlines')    

    # create basin outer boundary shapefile 
    tmp_gpd                = basin_gru_gpd[['geometry']]
    basin_gru_gpd['null_column'] = 0
    basin_boundary_gpd     = basin_gru_gpd.dissolve(by='null_column')
    basin_boundary_prj_shp = basin_gru_prj_shp.split('.shp')[0]+'_boundary.shp'
    basin_boundary_gpd.to_file(basin_boundary_prj_shp)
    print('wrote basin boundary shapefile to use in stream clipping:', basin_boundary_prj_shp) 
    
    # clip full-dom reprojected flowlines with basin boundary     
    #   note: if geopandas version < 0.7, cannot use clip(), so instead use ogr2ogr
    if float(gpd.__version__.split(".")[0]+"."+gpd.__version__.split(".")[1]) >= 0.7:
        in_gpd_clip = gpd.clip(flowlines_gpd, basin_boundary_gpd)
        in_gpd_clip.to_file(basin_flowlines_prj_shp)
    else:
        print('Note: using ogr2ogr to clip streams to basin')
        driverName = 'ESRI Shapefile'    # can later be upgraded to work with geopackages (eg 'GPKG')
        ogr2ogr.main(["", "-f", driverName, "-clipsrc", basin_boundary_prj_shp, basin_flowlines_prj_shp, flowlines_prj_shp]) 
    print('wrote basin-clipped stream shapefile:', basin_flowlines_prj_shp)    