## StreamStats API Scraper Automatic

__Description__: Tool to automatically run the [USGS StreamStats tool](https://www.usgs.gov/mission-areas/water-resources/science/streamstats-streamflow-statistics-and-spatial-analysis-tools?qt-science_center_objects=0#qt-science_center_objects) for multiple points within a catchment and return the flow frequency curves and subcatchment boundaries.

__Input__: A shapefile containing the latitude and longitude of points on the stream grid for the specified state (confluence and main stem locations).

__Output__: GeoJSON file containing the delinated catchment boundary and flow frequency data for each point, as well as a CSV file containing the flow frequency data.

*Authors*: sputnam@Dewberry.com & slawler@Dewberry.com

### Load libraries and Python options:

In [1]:
import os
import re
import sys
import json
sys.path.append('../USGStools')
from StreamStats_API_Scraper import*
import geopandas as gpd
from geojson import dump

### Specify the state abbreviation and location of the shapefile: 

##### Specify:

In [2]:
#The state abbreviation in uppercase
state='NY' 

#Specify the location of the shapefile containing the lat/lon of points on the stream grid
path=r'C:\Users\tmiesse\Work\data\0410305_v2' 
name='FlowChangeLocations.shp' #The name of the shapefile

#Specify a consistent coordinate reference system
use_epsg='4326' 

#Location to save the StreamStats results for each polygon
allresults=os.path.join(path,'AllStreamStats') 
if os.path.isdir(allresults)==False: 
    os.mkdir(allresults)    

##### Load the shapefile:

In [3]:
#Read the shapefile as a geopandas dataframe
#Transform the coordinate reference system of the geodataframe
gdf=gpd.read_file(os.path.join(path, name)) 
gdf=gdf.to_crs({'init': 'epsg:{0}'.format(use_epsg)}) 

##### Initialize parameters

In [6]:
#Dictionary to store the catchment polygons (catchment boundaries) 
polyg={} 

#Dictionary to store the outlet flow frequency data dictionaries
ffdata={} 

get_flow=True
print_status=True
if state=='WI': get_flow=False 
errors = [0]
count = {i:0 for i in gdf['Id']}
keys = count.keys()

### Run the API tool for each point:

In [None]:

while errors != [] and [count[key] > 3 for key in keys]:
    errors = []
    for i in gdf.index.values[:100]:
        try:
            #Longitude and latitude for each shapely point and the confluence number
            lon, lat, ID_Num = gdf.geometry[i].x, gdf.geometry[i].y, gdf['Id'][i] 
            if print_status: print("Lat/Lon/Confluence:", lat, lon, ID_Num)
                
            #Run the SS_scrape function. Option: set status=False to hide print statements
            polyg[ID_Num], ff_json  = SS_scrape(state, lon, lat, use_epsg, print_status) 
            if get_flow: 
                #Use the function above to extract the json data
                ffdata[ID_Num]                           = get_peaks(ff_json) 
                polyg[ID_Num]['features'][0]['ffcurve']  = ffdata[ID_Num]
            
            with open(os.path.join(allresults,'StreamStats_Polygons_{0}.geojson'.format(int(ID_Num))), 'w') as f:
                dump(polyg[ID_Num], f)
        except:
            print('could not process data {}'.format(gdf['Id'][i]))
            errors.append(gdf['Id'][i]) 
            count[gdf['Id'][i]] += 1

Lat/Lon/Confluence: 44.97612324719089 -74.72386892012858 0
Fetched Peak Flows
Lat/Lon/Confluence: 44.96163800832418 -74.76649199214766 1
Line 28: Expecting value: line 1 column 1 (char 0
while loop: watershed_data count: 1
could not process data 1
Lat/Lon/Confluence: 44.961474264097994 -74.76625647109408 2
Fetched Peak Flows
Lat/Lon/Confluence: 44.94197659512003 -74.79904633952789 3
Fetched Peak Flows
Lat/Lon/Confluence: 44.94198100898731 -74.79872451069143 4
Fetched Peak Flows
Lat/Lon/Confluence: 44.91482376441719 -74.89681044315671 5
Line 28: Expecting value: line 1 column 1 (char 0
while loop: watershed_data count: 1
could not process data 5
Lat/Lon/Confluence: 44.92426035017822 -74.80534433275758 6
Fetched Peak Flows
Lat/Lon/Confluence: 44.924352441363496 -74.8047415331623 7
Fetched Peak Flows
Lat/Lon/Confluence: 44.91903160771314 -74.81292545710593 8
Fetched Peak Flows
Lat/Lon/Confluence: 44.919752689550684 -74.80481008298045 9
Fetched Peak Flows
Lat/Lon/Confluence: 44.91653977656

### Load the results:

In [11]:
files=load_files(allresults)
gdf2, ffdic=load_results(files, use_epsg)

553 Polygon Files Found


###  Save:

##### The flow frequency data as a CSV:

In [7]:
#Run this function to construct the summary table for all outlet locations
if get_flow: ff_df=ff_summary(ffdic) 
    
#Save the results as a csv
if get_flow: ff_df.to_csv(os.path.join(path,'StreamStats_FlowFrequency.csv')) 

               0           1           10          100         101  \
RI                                                                   
1.25   48.881095  130.847849   412.181344   116.206599   45.373520   
1.5    77.784433  162.826717   714.454795   191.959204   54.875430   
2.0   120.172534  202.558998  1182.888321   304.476494   66.577529   
5.0   284.661247  304.449554  3163.029520   742.717882   95.345366   
10.0  439.220450  372.371346  5164.076413  1152.387406  113.641174   

              102          103        104         105         107  ...  \
RI                                                                 ...   
1.25   140.953853   215.050252  25.993971  138.334536  103.450420  ...   
1.5    223.598113   355.164527  31.242808  172.071887  166.772998  ...   
2.0    343.301191   562.896103  37.738107  213.807263  258.916663  ...   
5.0    796.951543  1375.281564  53.625479  322.546562  604.690606  ...   
10.0  1214.960139  2140.088416  63.644832  395.928167  917.639018

##### The catchment polygons as a Shapefile:

In [12]:
#Export the geodataframe as a shapefile
gdf2 = convert_attr(gdf2)
gdf2.to_file(filename = os.path.join(path,'StreamStats_Polygons.shp')) 

##### The catchment polygons as a geojson:

In [13]:
with open(os.path.join(path,'StreamStats_Polygons.geojson'), 'w') as f:
     dump(gdf2, f)  

# END