# StreamStats API Scraper

#### Development Notebook

In [1]:
import json
import requests

In [2]:
rcode='MD'
xlocation= -76.93629
ylocation=  38.94793
crs=4326
stats_group=2
configs=2

#### Assign Location & Stats Group (2=Peakflow)

#### URL Strings

In [3]:
waterhsed_url = 'https://streamstats.usgs.gov/streamstatsservices/watershed.geojson?'
PercentOverlay_url = 'https://gis.streamstats.usgs.gov/arcgis/rest/services/nss/regions/MapServer/exts/PercentOverlayRESTSOE/PercentOverlay'
stats_groups_url= f"https://streamstats.usgs.gov/nssservices/statisticgroups/{stats_group}.json?"
scenarios_url = "https://streamstats.usgs.gov/nssservices/scenarios.json?"
parameters_url = r'https://streamstats.usgs.gov/streamstatsservices/parameters.json?'
estimate_url = 'https://streamstats.usgs.gov/nssservices/scenarios/estimate.json?'
Href_url = f"https://streamstats.usgs.gov/nssservices/statisticgroups/{stats_group}"

In [4]:
watershed_params = {'rcode':rcode, 'xlocation': xlocation,'ylocation':ylocation, 
          'crs':crs, 'includeparameters':'true', 'includefeatures':'true', 'simplify':'true'}

r = requests.get(waterhsed_url, watershed_params)
watershed_data = json.loads(r.content.decode())
watershed_data.keys()

dict_keys(['workspaceID', 'featurecollection', 'parameters', 'messages'])

In [5]:
workspaceID = watershed_data['workspaceID']
print(workspaceID)

MD20190125231921461000


### Featureclass From API

In [6]:
featurecollection = watershed_data['featurecollection']
watershed_poly = featurecollection[1]['feature']

#### Percent Overlay Test

In [7]:
PercentOverlay_params = {'geometry': json.dumps(watershed_poly), 'f': 'json'}
r = requests.post(PercentOverlay_url, PercentOverlay_params)
PercentOverlay = r.json()

### Quick Fix, requires testing/refinement

Select group codes by keyowrd, revisit to verify. This method may provide inaccurate results

In [8]:
regressionregion_codes = []
for group in PercentOverlay:
    group_name = group['name']
    if 'Peak' in group_name and 'Urban' not in group_name:
        regressionregion_codes.append(group['code'])
        
regressionregion_codes
reg_codes = ','.join(regressionregion_codes)
reg_codes

'gc1479,gc1480'

In [9]:
rr_weight={}
for rr in  PercentOverlay:
    rr_code = rr['code'] 
    if rr_code in regressionregion_codes:
        rr_weight[rr_code] = rr['percent']
        
rr_weight

{'gc1479': 71.15778149903338, 'gc1480': 28.842218502961664}

In [10]:
stats_groups_url_params = {'region':rcode,'regressionregions':reg_codes}
r = requests.get(stats_groups_url, json=stats_groups_url_params)
stats_groups = r.json() 
stats_groups

{'ID': 2, 'Name': 'Peak-Flow Statistics', 'Code': 'PFS'}

In [11]:
scenarios_url_params = {'region': rcode ,'statisticgroups': stats_group, 'regressionregions':reg_codes, 'configs': 2}
r = requests.get(scenarios_url, data=scenarios_url_params)
scenarios = r.json()

In [12]:
rr_parameter_codes=[]
for rr in scenarios[0]['RegressionRegions']:
    reg_code = rr['Code']
    if reg_code.lower() in reg_codes:
        parameters = rr['Parameters']
        for pp in parameters:
            for k,v in pp.items():
                if k == 'Code':
                    rr_parameter_codes.append(v)
                
rr_parameter_codes = ','.join(list(set(rr_parameter_codes)))
rr_parameter_codes

'IMPERV,FOREST_MD,LIME,DRNAREA,SOILCorD'

In [13]:
parameters_url

'https://streamstats.usgs.gov/streamstatsservices/parameters.json?'

In [19]:
parameters_params = json.dumps({'rcode': rcode, 'workspaceID':workspaceID, 'includeparameters': rr_parameter_codes})
r = requests.get(parameters_url, json.loads(parameters_params))
pdata = r.json()
pdata

{'parameters': [{'ID': 0,
   'name': 'Drainage Area',
   'description': 'Area that drains to a point on a stream',
   'code': 'DRNAREA',
   'unit': 'square miles',
   'value': 73.9},
  {'ID': 0,
   'name': 'Percent forest from MD 2010 land use',
   'description': 'Percent forest from Maryland 2010 land-use data',
   'code': 'FOREST_MD',
   'unit': 'percent',
   'value': 28.6},
  {'ID': 0,
   'name': 'Percent Impervious',
   'description': 'Percentage of impervious area',
   'code': 'IMPERV',
   'unit': 'percent',
   'value': 28.6},
  {'ID': 0,
   'name': 'Percent Limestone',
   'description': 'Percentage of area of limestone geology',
   'code': 'LIME',
   'unit': 'percent',
   'value': 0.0},
  {'ID': 0,
   'name': 'Percent SSURGO Soil Type C or D',
   'description': 'Percentage of area of Hydrologic Soil Type C or D from SSURGO',
   'code': 'SOILCorD',
   'unit': 'percent',
   'value': 65.9}],
 'messages': ['xmlcount:14',
  'From ApFields',
  'count: 14',
  'DB return count: 14',
  'S

*__Include error check for above cell output__*

In [20]:
use_codes={}
for p in pdata['parameters']:
    #print(p['code'], p['value'])
    use_codes[p['code']] = p['value']

In [21]:
estimate_params =  {'region': rcode,'statisticgroups':2,'regressionregions':reg_codes,'configs':2}
r = requests.get(estimate_url, data =estimate_params)
estimate = r.json()
#estimate

In [None]:
#use_codes.keys()

In [22]:
est = json.loads(r.content.decode())
for regregion in est[0]['RegressionRegions']:
    for p in regregion['Parameters']:
        if p['Code'] in use_codes.keys():
            p['Value'] = use_codes[p['Code']]
            #print(p['Code'], p['Value'], use_codes[p['Code']] )
#print("updated est")

In [23]:
payload = dict()
payload["Links"] = [{"rel": "self",
        "Href": Href_url,
        "method": "GET"}] 

#payload

In [24]:
for k, v in stats_groups.items():
    payload[k] = v
    
payload['StatisticGroupID'] = stats_group
#payload

In [25]:
rr_list=[]
for rr in est[0]['RegressionRegions']:
    reg_code = rr['Code']
    if reg_code.lower() in reg_codes:
        #print(rr['Code'])
        rr_list.append(rr)
        rr['PercentWeight']=rr_weight[reg_code.lower()]
        
payload['RegressionRegions'] = rr_list

In [26]:
peak_flow_url = f"https://streamstats.usgs.gov/nssservices/scenarios/estimate.json?region={rcode}&statisticgroups={stats_group}&regressionregions={reg_codes}&configs={configs}"
r = requests.post(peak_flow_url, json=json.loads(json.dumps([payload])))
r.json()

[{'StatisticGroupID': 2,
  'RegressionRegions': [{'ID': 256,
    'Name': 'Peak_Western_Coastal_Plain_2010_AHMMD',
    'Code': 'GC1479',
    'PercentWeight': 71.15778149903338,
    'Parameters': [{'ID': 5396,
      'Name': 'Drainage Area',
      'Description': 'Area that drains to a point on a stream',
      'Code': 'DRNAREA',
      'UnitType': {'ID': 0, 'Unit': 'square miles', 'Abbr': 'mi^2'},
      'Value': 73.9,
      'Limits': {'Max': 349.6, 'Min': 0.41}},
     {'ID': 5397,
      'Name': 'Percent SSURGO Soil Type C or D',
      'Description': 'Percentage of area of Hydrologic Soil Type C or D from SSURGO',
      'Code': 'SOILCorD',
      'UnitType': {'ID': 0, 'Unit': 'percent', 'Abbr': '%'},
      'Value': 65.9,
      'Limits': {'Max': 74.7, 'Min': 13.0}},
     {'ID': 5398,
      'Name': 'Percent Impervious',
      'Description': 'Percentage of impervious area',
      'Code': 'IMPERV',
      'UnitType': {'ID': 0, 'Unit': 'percent', 'Abbr': '%'},
      'Value': 28.6,
      'Limits': 

# END