## StreamStats API Scraper Automatic

__Description__: Tool to automatically run the [USGS StreamStats tool](https://www.usgs.gov/mission-areas/water-resources/science/streamstats-streamflow-statistics-and-spatial-analysis-tools?qt-science_center_objects=0#qt-science_center_objects) for multiple points within a catchment and return the flow frequency curves and subcatchment boundaries.

__Input__: A shapefile containing the latitude and longitude of points on the stream grid for the specified state (confluence and main stem locations).

__Output__: GeoJSON file containing the delinated catchment boundary and flow frequency data for each point, as well as a CSV file containing the flow frequency data.

*Authors*: sputnam@Dewberry.com & slawler@Dewberry.com

### Load libraries and Python options:

In [21]:
import os
import re
import sys
import json
sys.path.append(r'../Core')
from StreamStats_API_Scraper import *
import geopandas as gpd
from geojson import dump

### Specify the state abbreviation and location of the shapefile: 

##### Specify:

In [22]:
# The state abbreviation in uppercase
state = 'NY' 

# Specify the location of the shapefile containing the lat/lon of points on
# the stream grid
path = r'P:\02\NY\R2_BLE_Discovery\TECHNICAL\Tioga\HYDROLOGY\streamstats' 
name = 'Confluences.shp' #The name of the shapefile
id_field = 'ID_Num'

# Specify a consistent coordinate reference system
use_epsg = '26918' # '4326' 

# Location to save the StreamStats results for each polygon
allresults=os.path.join(path,'AllStreamStats') 
if os.path.isdir(allresults) == False: 
    os.mkdir(allresults)    

##### Load the shapefile:

In [23]:
# Read the shapefile as a geopandas dataframe
# Transform the coordinate reference system of the geodataframe
gdf=gpd.read_file(os.path.join(path, name))
gdf=gdf.to_crs({'init': 'epsg:{0}'.format(use_epsg)})

In [24]:
gdf.index = gdf[id_field].values

##### Initialize parameters

In [25]:
# Dictionary to store the catchment polygons (catchment boundaries)
polyg={}

# Dictionary to store the outlet flow frequency data dictionaries
ffdata={}

get_flow=True
print_status=True
if state=='WI': get_flow=False
errors = [0]
count = {i:0 for i in gdf[id_field]}
keys = count.keys()

### Run the API tool for each point:

In [None]:
# while errors != [] and [count[key] > 3 for key in keys]:
errors = []
for i in gdf.index.values:
    try:
        # Longitude and latitude for each shapely point and the
        # confluence number
        lon, lat = gdf.geometry[i].x, gdf.geometry[i].y
        ID_Num = gdf[id_field][i]
        if print_status: print("Lat/Lon/Confluence:", lat, lon, ID_Num)

        # Run the SS_scrape function. Option: set status=False to hide
        # print statements
        polyg[ID_Num], ff_json = SS_scrape(state, lon, lat,
                                           use_epsg, print_status)
        if get_flow:
            # Use the function above to extract the json data
            ffdata[ID_Num] = get_peaks(ff_json)
            polyg[ID_Num]['features'][0]['ffcurve'] = ffdata[ID_Num]
        
        gjson_name = f'StreamStats_Polygons_{int(ID_Num)}.geojson'
        with open(os.path.join(allresults, gjson_name), 'w') as f:
            dump(polyg[ID_Num], f)
    except:
        print('could not process data {}'.format(gdf[id_field][i]))
        errors.append(gdf[id_field][i])
        count[gdf[id_field][i]] += 1

Lat/Lon/Confluence: 4689364.445002781 274651.35086100595 0.0
Line 28: Expecting value: line 1 column 1 (char 0
while loop: watershed_data count: 1
could not process data 0.0
Lat/Lon/Confluence: 4689324.445002781 274621.35086100595 1.0
Fetched Peak Flows
Lat/Lon/Confluence: 4689394.445002781 274671.35086100595 2.0
Fetched Peak Flows
Lat/Lon/Confluence: 4689364.445002781 274611.35086100595 3.0
Fetched Peak Flows
Lat/Lon/Confluence: 4689164.445002781 274531.35086100595 4.0
Fetched Peak Flows
Lat/Lon/Confluence: 4689064.445002781 274691.35086100595 5.0
Fetched Peak Flows
Lat/Lon/Confluence: 4689064.445002781 274651.35086100595 6.0
Line 28: Expecting value: line 1 column 1 (char 0
while loop: watershed_data count: 1
could not process data 6.0
Lat/Lon/Confluence: 4688884.445002781 274451.35086100595 7.0
Fetched Peak Flows
Lat/Lon/Confluence: 4688854.445002781 274491.35086100595 8.0
Fetched Peak Flows
Lat/Lon/Confluence: 4688644.445002781 274451.35086100595 9.0
Fetched Peak Flows
Lat/Lon/Conf

### Load the results:

In [None]:
files = load_files(allresults)
gdf2, ffdic = load_results(files, use_epsg)

In [None]:
gdf.drop(gdf.loc[gdf[id_field][gdf2['ID_Num']]].index, inplace=True)
gdf.index.values

###  Save:

##### The flow frequency data as a CSV:

In [None]:
# Run this function to construct the summary table for all outlet locations
if get_flow:
    ff_df=ff_summary(ffdic)

# Save the results as a csv
if get_flow:
    ff_df.to_csv(os.path.join(path,'StreamStats_FlowFrequency.csv'))

##### The catchment polygons as a Shapefile:

In [None]:
# Export the geodataframe as a shapefile
gdf2 = convert_attr(gdf2)
gdf2.to_file(os.path.join(path, 'StreamStats_Polygons.shp'))

##### The catchment polygons as a geojson:

In [None]:
with open(os.path.join(path,'StreamStats_Polygons.geojson'), 'w') as f:
     dump(gdf2, f)

# END