## StreamStats API Scraper Automatic

__Description__: Tool to automatically run the [USGS StreamStats tool](https://www.usgs.gov/mission-areas/water-resources/science/streamstats-streamflow-statistics-and-spatial-analysis-tools?qt-science_center_objects=0#qt-science_center_objects) for multiple points within a catchment and return the flow frequency curves and subcatchment boundaries.

__Input__: A shapefile containing the latitude and longitude of points on the stream grid for the specified state (confluence and main stem locations).

__Output__: GeoJSON file containing the delinated catchment boundary and flow frequency data for each point, as well as a CSV file containing the flow frequency data.

*Authors*: sputnam@Dewberry.com & slawler@Dewberry.com

### Load libraries and Python options:

In [1]:
import os
import re
import sys
import json
sys.path.append('../USGStools')
from StreamStats_API_Scraper import*
import geopandas as gpd
from geojson import dump

### Specify the state abbreviation and location of the shapefile: 

##### Specify:

In [2]:
#The state abbreviation in uppercase
state='NY' 

#Specify the location of the shapefile containing the lat/lon of points on the stream grid
path=r'C:\Users\tmiesse\Work\dewberry_stuff\ryans_data\orleans' 
name='confluences.shp' #The name of the shapefile

#Specify a consistent coordinate reference system
use_epsg='4326' 

#Location to save the StreamStats results for each polygon
allresults=os.path.join(path,'AllStreamStats') 
if os.path.isdir(allresults)==False: 
    os.mkdir(allresults)    

##### Load the shapefile:

In [3]:
#Read the shapefile as a geopandas dataframe
#Transform the coordinate reference system of the geodataframe
gdf=gpd.read_file(os.path.join(path, name)) 
gdf=gdf.to_crs({'init': 'epsg:{0}'.format(use_epsg)}) 

##### Initialize parameters

In [4]:
#Dictionary to store the catchment polygons (catchment boundaries) 
polyg={} 

#Dictionary to store the outlet flow frequency data dictionaries
ffdata={} 

get_flow=True
print_status=True
if state=='WI': get_flow=False 
errors = [0]

### Run the API tool for each point:

In [None]:
while errors != []:
    errors = []
    for i in gdf.index.values[:250]:
        try:
            #Longitude and latitude for each shapely point and the confluence number
            lon, lat, ID_Num = gdf.geometry[i].x, gdf.geometry[i].y, gdf['ID_Num'][i] 
            if print_status: print("Lat/Lon/Confluence:", lat, lon, ID_Num)
                
            #Run the SS_scrape function. Option: set status=False to hide print statements
            polyg[ID_Num], ff_json  = SS_scrape(state, lon, lat, use_epsg, print_status) 
            if get_flow: 
                #Use the function above to extract the json data
                ffdata[ID_Num]                           = get_peaks(ff_json) 
                polyg[ID_Num]['features'][0]['ffcurve']  = ffdata[ID_Num]
            
            with open(os.path.join(allresults,'StreamStats_Polygons_{0}.geojson'.format(int(ID_Num))), 'w') as f:
                dump(polyg[ID_Num], f)
        except:
            print('could not process data {}'.format(gdf['ID_Num'][i]))
            errors.append(gdf['ID_Num'][i]) 

Lat/Lon/Confluence: 43.329240955729645 -77.99522621186907 0.0
Fetched Peak Flows
Lat/Lon/Confluence: 43.3291219703773 -77.99633039437262 1.0
Fetched Peak Flows
could not process data 1.0
Lat/Lon/Confluence: 43.328771969577375 -77.99594316471261 2.0
Fetched Peak Flows
Lat/Lon/Confluence: 43.328259629433404 -78.0051681153075 3.0
Fetched Peak Flows
could not process data 3.0
Lat/Lon/Confluence: 43.328724619639736 -78.00802779532391 4.0
Fetched Peak Flows
could not process data 4.0
Lat/Lon/Confluence: 43.32836493397188 -78.00801002958254 5.0


### Load the results:

In [33]:
files=load_files(allresults)
gdf, ffdic=load_results(files, use_epsg)

306 Polygon Files Found


###  Save:

##### The flow frequency data as a CSV:

In [34]:
#Run this function to construct the summary table for all outlet locations
if get_flow: ff_df=ff_summary(ffdic) 
    
#Save the results as a csv
if get_flow: ff_df.to_csv(os.path.join(path,'StreamStats_FlowFrequency.csv')) 

               0           1          10         100        101          102  \
RI                                                                             
1.25  275.936540  275.828738  279.523501  160.943822   5.228393   403.182882   
1.5   319.878411  319.743587  324.633006  194.037752   6.443230   482.258700   
2.0   374.407981  374.238640  380.674491  237.028873   8.116094   583.519117   
5.0   519.825485  519.562349  530.266293  354.353897  12.784931   858.916767   
10.0  620.072033  619.743144  633.503848  437.657095  16.205182  1055.023383   

           104       105          106       107  ...         89         9  \
RI                                               ...                        
1.25  1.944492  1.043198   403.556505  0.956855  ...   3.073012  1.831583   
1.5   2.417815  1.293627   482.815746  1.185108  ...   3.813949  2.270821   
2.0   3.084474  1.650115   584.327395  1.510184  ...   4.855562  2.891167   
5.0   4.998262  2.655534   860.462941  2.426273  ...  

##### The catchment polygons as a Shapefile:

In [35]:
#Export the geodataframe as a shapefile
gdf.to_file(filename = os.path.join(path,'StreamStats_Polygons.shp')) 

  with fiona.drivers():


##### The catchment polygons as a geojson:

In [36]:
with open(os.path.join(path,'StreamStats_Polygons.geojson'), 'w') as f:
     dump(gdf, f)  

# END