# StreamStats API Scraper

#### Development Notebook

In [28]:
import json
import requests

In [29]:
rcode='NY'
xlocation= -74.57267408040926 
ylocation=  44.93991892773472
crs=4326
stats_group=2
configs=2

#### Assign Location & Stats Group (2=Peakflow)

#### URL Strings

In [30]:
waterhsed_url = 'https://streamstats.usgs.gov/streamstatsservices/watershed.geojson?'
PercentOverlay_url = 'https://gis.streamstats.usgs.gov/arcgis/rest/services/nss/regions/MapServer/exts/PercentOverlayRESTSOE/PercentOverlay'
stats_groups_url= f"https://streamstats.usgs.gov/nssservices/statisticgroups/{stats_group}.json?"
scenarios_url = "https://streamstats.usgs.gov/nssservices/scenarios.json?"
parameters_url = r'https://streamstats.usgs.gov/streamstatsservices/parameters.json?'
estimate_url = 'https://streamstats.usgs.gov/nssservices/scenarios/estimate.json?'
Href_url = f"https://streamstats.usgs.gov/nssservices/statisticgroups/{stats_group}"

In [31]:
watershed_params = {'rcode':rcode, 'xlocation': xlocation,'ylocation':ylocation, 
          'crs':crs, 'includeparameters':'true', 'includefeatures':'true', 'simplify':'true'}

r = requests.get(waterhsed_url, watershed_params)
watershed_data = json.loads(r.content.decode())
watershed_data.keys()

dict_keys(['workspaceID', 'featurecollection', 'parameters', 'messages'])

In [32]:
watershed_data['featurecollection'][1]['feature']['features'][0].keys()

dict_keys(['properties', 'type', 'bbox', 'geometry'])

In [34]:
workspaceID = watershed_data['workspaceID']
print(workspaceID)

NY20190315183538484000


### Featureclass From API

In [35]:
featurecollection = watershed_data['featurecollection']
watershed_poly = featurecollection[1]['feature']

#### Percent Overlay Test

In [36]:
PercentOverlay_params = {'geometry': json.dumps(watershed_poly), 'f': 'json'}
r = requests.post(PercentOverlay_url, PercentOverlay_params)
PercentOverlay = r.json()

In [37]:
PercentOverlay

[{'name': 'Bankfull_Regions_1_and_2_SIR2009_5144',
  'code': 'gc1425',
  'percent': 100.0,
  'areasqmeter': 38269345.38311274,
  'maskareasqmeter': 38269345.38311274},
 {'name': '2006_Full_Region_1',
  'code': 'gc1071',
  'percent': 100.0,
  'areasqmeter': 38269345.38311274,
  'maskareasqmeter': 38269345.38311274}]

### Quick Fix, requires testing/refinement

Select group codes by keyowrd, revisit to verify. This method may provide inaccurate results

In [38]:
regressionregion_codes = []
for group in PercentOverlay:
    group_name = group['name']
    if '2006_Full_Region' in group_name:
        regressionregion_codes.append(group['code'])
        
regressionregion_codes
reg_codes = ','.join(regressionregion_codes)
reg_codes

'gc1071'

In [39]:
rr_weight={}
for rr in  PercentOverlay:
    rr_code = rr['code'] 
    if rr_code in regressionregion_codes:
        rr_weight[rr_code] = rr['percent']
        
rr_weight

{'gc1071': 100.0}

In [40]:
stats_groups_url_params = {'region':rcode,'regressionregions':reg_codes}
r = requests.get(stats_groups_url, json=stats_groups_url_params)
stats_groups = r.json() 
stats_groups

{'ID': 2, 'Name': 'Peak-Flow Statistics', 'Code': 'PFS'}

In [41]:
scenarios_url_params = {'region': rcode ,'statisticgroups': stats_group, 'regressionregions':reg_codes, 'configs': 2}
r = requests.get(scenarios_url, data=scenarios_url_params)
scenarios = r.json()

In [42]:
rr_parameter_codes=[]
for rr in scenarios[0]['RegressionRegions']:
    reg_code = rr['Code']
    if reg_code.lower() in reg_codes:
        parameters = rr['Parameters']
        for pp in parameters:
            for k,v in pp.items():
                if k == 'Code':
                    rr_parameter_codes.append(v)
                
rr_parameter_codes = ','.join(list(set(rr_parameter_codes)))
rr_parameter_codes

'STORAGE,PRECIP,LAGFACTOR,DRNAREA,FOREST'

In [43]:
parameters_url

'https://streamstats.usgs.gov/streamstatsservices/parameters.json?'

In [44]:
parameters_params = json.dumps({'rcode': rcode, 'workspaceID':workspaceID, 'includeparameters': rr_parameter_codes})
r = requests.get(parameters_url, json.loads(parameters_params))
pdata = r.json()
pdata

{'parameters': [{'ID': 0,
   'name': 'Drainage Area',
   'description': 'Area that drains to a point on a stream',
   'code': 'DRNAREA',
   'unit': 'square miles',
   'value': 14.8},
  {'ID': 0,
   'name': 'Percent Forest',
   'description': 'Percentage of area covered by forest',
   'code': 'FOREST',
   'unit': 'percent',
   'value': 64.0},
  {'ID': 0,
   'name': 'Lag Factor',
   'description': 'Lag Factor as defined in SIR 2006-5112',
   'code': 'LAGFACTOR',
   'unit': 'dimensionless',
   'value': 0.98},
  {'ID': 0,
   'name': 'Mean Annual Precipitation',
   'description': 'Mean Annual Precipitation',
   'code': 'PRECIP',
   'unit': 'inches',
   'value': 32.4},
  {'ID': 0,
   'name': 'Percent Storage',
   'description': 'Percentage of area of storage (lakes ponds reservoirs wetlands)',
   'code': 'STORAGE',
   'unit': 'percent',
   'value': 12.1}],
 'messages': ['xmlcount:28',
  'From ApFields',
  'count: 28',
  'DB return count: 28',
  'Start Time: Fri Mar 15 18:36:24 2019 Performin

*__Include error check for above cell output__*

In [45]:
use_codes={}
for p in pdata['parameters']:
    print(p['code'], p['value'])
    use_codes[p['code']] = p['value']

DRNAREA 14.8
FOREST 64.0
LAGFACTOR 0.98
PRECIP 32.4
STORAGE 12.1


In [46]:
estimate_params =  {'region': rcode,'statisticgroups':2,'regressionregions':reg_codes,'configs':2}
r = requests.get(estimate_url, data =estimate_params)
estimate = r.json()
#estimate

In [47]:
#use_codes.keys()

In [48]:
est = json.loads(r.content.decode())
for regregion in est[0]['RegressionRegions']:
    for p in regregion['Parameters']:
        if p['Code'] in use_codes.keys():
            p['Value'] = use_codes[p['Code']]
            #print(p['Code'], p['Value'], use_codes[p['Code']] )
#print("updated est")

In [49]:
payload = dict()
payload["Links"] = [{"rel": "self",
        "Href": Href_url,
        "method": "GET"}] 

#payload

In [50]:
for k, v in stats_groups.items():
    payload[k] = v
    
payload['StatisticGroupID'] = stats_group
#payload

In [51]:
rr_list=[]
for rr in est[0]['RegressionRegions']:
    reg_code = rr['Code']
    if reg_code.lower() in reg_codes:
        #print(rr['Code'])
        rr_list.append(rr)
        rr['PercentWeight']=rr_weight[reg_code.lower()]
        
payload['RegressionRegions'] = rr_list

In [52]:
peak_flow_url = f"https://streamstats.usgs.gov/nssservices/scenarios/estimate.json?region={rcode}&statisticgroups={stats_group}&regressionregions={reg_codes}&configs={configs}"
r = requests.post(peak_flow_url, json=json.loads(json.dumps([payload])))
r.json()

[{'StatisticGroupID': 2,
  'RegressionRegions': [{'ID': 410,
    'Name': '2006_Full_Region_1',
    'Code': 'GC1071',
    'PercentWeight': 100.0,
    'Parameters': [{'ID': 9228,
      'Name': 'Drainage Area',
      'Description': 'Area that drains to a point on a stream',
      'Code': 'DRNAREA',
      'UnitType': {'ID': 0, 'Unit': 'square miles', 'Abbr': 'mi^2'},
      'Value': 14.8,
      'Limits': {'Max': 4500.0, 'Min': 0.54}},
     {'ID': 9229,
      'Name': 'Lag Factor',
      'Description': 'Lag Factor as defined in SIR 2006-5112',
      'Code': 'LAGFACTOR',
      'UnitType': {'ID': 0, 'Unit': 'dimensionless', 'Abbr': 'dim'},
      'Value': 0.98,
      'Limits': {'Max': 15.229, 'Min': 0.004}},
     {'ID': 9230,
      'Name': 'Percent Storage',
      'Description': 'Percentage of area of storage (lakes ponds reservoirs wetlands)',
      'Code': 'STORAGE',
      'UnitType': {'ID': 0, 'Unit': 'percent', 'Abbr': '%'},
      'Value': 12.1,
      'Limits': {'Max': 28.92, 'Min': 0.0}},
 

# END