# StreamStats API Scraper

#### Development Notebook

In [1]:
import json
import requests

In [2]:
rcode='WI'
xlocation= -87.49781885475109 
ylocation=  44.45880879748906
crs=4326
stats_group=2
configs=2

#### Assign Location & Stats Group (2=Peakflow)

#### URL Strings

In [3]:
waterhsed_url = 'https://streamstats.usgs.gov/streamstatsservices/watershed.geojson?'
PercentOverlay_url = 'https://gis.streamstats.usgs.gov/arcgis/rest/services/nss/regions/MapServer/exts/PercentOverlayRESTSOE/PercentOverlay'
stats_groups_url= f"https://streamstats.usgs.gov/nssservices/statisticgroups/{stats_group}.json?"
scenarios_url = "https://streamstats.usgs.gov/nssservices/scenarios.json?"
parameters_url = r'https://streamstats.usgs.gov/streamstatsservices/parameters.json?'
estimate_url = 'https://streamstats.usgs.gov/nssservices/scenarios/estimate.json?'
Href_url = f"https://streamstats.usgs.gov/nssservices/statisticgroups/{stats_group}"

In [4]:
watershed_params = {'rcode':rcode, 'xlocation': xlocation,'ylocation':ylocation, 
          'crs':crs, 'includeparameters':'true', 'includefeatures':'true', 'simplify':'true'}

r = requests.get(waterhsed_url, watershed_params)
watershed_data = json.loads(r.content.decode())
watershed_data.keys()

dict_keys(['workspaceID', 'featurecollection', 'parameters', 'messages'])

In [5]:
watershed_data['featurecollection'][1]['feature']['features'][0].keys()

dict_keys(['properties', 'type', 'bbox', 'geometry'])

In [6]:
workspaceID = watershed_data['workspaceID']
print(workspaceID)

WI20190321174213457000


### Featureclass From API

In [7]:
featurecollection = watershed_data['featurecollection']
watershed_poly = featurecollection[1]['feature']

#### Percent Overlay Test

In [8]:
PercentOverlay_params = {'geometry': json.dumps(watershed_poly), 'f': 'json'}
r = requests.post(PercentOverlay_url, PercentOverlay_params)
PercentOverlay = r.json()

In [26]:
PercentOverlay

[]

### Quick Fix, requires testing/refinement

Select group codes by keyowrd, revisit to verify. This method may provide inaccurate results

In [10]:
regressionregion_codes = []
for group in PercentOverlay:
    group_name = group['name']
    if '2006_Full_Region' in group_name:
        regressionregion_codes.append(group['code'])
        
regressionregion_codes
reg_codes = ','.join(regressionregion_codes)
reg_codes

''

In [11]:
rr_weight={}
for rr in  PercentOverlay:
    rr_code = rr['code'] 
    if rr_code in regressionregion_codes:
        rr_weight[rr_code] = rr['percent']
        
rr_weight

{}

In [12]:
stats_groups_url_params = {'region':rcode,'regressionregions':reg_codes}
r = requests.get(stats_groups_url, json=stats_groups_url_params)
stats_groups = r.json() 
stats_groups

{'ID': 2, 'Name': 'Peak-Flow Statistics', 'Code': 'PFS'}

In [13]:
scenarios_url_params = {'region': rcode ,'statisticgroups': stats_group, 'regressionregions':reg_codes, 'configs': 2}
r = requests.get(scenarios_url, data=scenarios_url_params)
scenarios = r.json()

In [14]:
rr_parameter_codes=[]
for rr in scenarios[0]['RegressionRegions']:
    reg_code = rr['Code']
    if reg_code.lower() in reg_codes:
        parameters = rr['Parameters']
        for pp in parameters:
            for k,v in pp.items():
                if k == 'Code':
                    rr_parameter_codes.append(v)
                
rr_parameter_codes = ','.join(list(set(rr_parameter_codes)))
rr_parameter_codes

''

In [15]:
parameters_url

'https://streamstats.usgs.gov/streamstatsservices/parameters.json?'

In [16]:
parameters_params = json.dumps({'rcode': rcode, 'workspaceID':workspaceID, 'includeparameters': rr_parameter_codes})
r = requests.get(parameters_url, json.loads(parameters_params))
pdata = r.json()
pdata

{'parameters': [{'ID': 0,
   'name': 'CLIFAC100Y',
   'description': '100-year climate factor from Litchy and Karlinger (1990)',
   'code': 'CLIFAC100Y',
   'unit': 'dimensionless',
   'value': 2.7},
  {'ID': 0,
   'name': 'CLIFAC25Y',
   'description': '25-year climate factor from Litchy and Karlinger (1990)',
   'code': 'CLIFAC25Y',
   'unit': 'dimensionless',
   'value': 2.5},
  {'ID': 0,
   'name': 'Tennessee Climate Factor 2 Year',
   'description': 'Two-year climate factor from Lichy and Karlinger (1990)',
   'code': 'CLIMFAC2YR',
   'unit': 'dimensionless',
   'value': 1.7},
  {'ID': 0,
   'name': 'Stream Slope 10 and 85 Method',
   'description': 'Change in elevation divided by length between points 10 and 85 percent of distance along main channel to basin divide - main channel method not known',
   'code': 'CSL10_85',
   'unit': 'feet per mi',
   'value': 8.03},
  {'ID': 0,
   'name': 'Percent developed from NLCD2001',
   'description': 'Percentage of land-use categories 21-24

*__Include error check for above cell output__*

In [17]:
use_codes={}
for p in pdata['parameters']:
    print(p['code'], p['value'])
    use_codes[p['code']] = p['value']

CLIFAC100Y 2.7
CLIFAC25Y 2.5
CLIMFAC2YR 1.7
CSL10_85 8.03
DEVNLCD01 6.49
DRNAREA 143.0
FOREST 8.2
I24H100Y 5.734
I24H10Y 3.4
I24H25Y 4.013
I24H2Y 2.224
I24H50Y 4.859
I24H5Y 2.822
LC01CRPHAY 77.6
LC01HERB 0.45
LC01WATER 0.2
LC11EMWET 0.71
LC11WDWET 5.54
PRECIP 30.26
SNOFALL 41.203
SOILPERM 12.326
WETLAND 6.27


In [18]:
estimate_params =  {'region': rcode,'statisticgroups':2,'regressionregions':reg_codes,'configs':2}
r = requests.get(estimate_url, data =estimate_params)
estimate = r.json()
#estimate

In [27]:
estimate

[{'StatisticGroupID': 2,
  'StatisticGroupName': 'Peak-Flow Statistics',
  'RegressionRegions': [{'ID': 719,
    'Name': 'Area_1',
    'Code': 'GC1461',
    'Parameters': [{'ID': 16495,
      'Name': 'Drainage Area',
      'Description': 'Area that drains to a point on a stream',
      'Code': 'DRNAREA',
      'UnitType': {'ID': 0, 'Unit': 'square miles', 'Abbr': 'mi^2'},
      'Value': -999.99,
      'Limits': {'Max': 2120.0, 'Min': 0.28}},
     {'ID': 16496,
      'Name': '24 Hour 25 Year Precipitation',
      'Description': 'Maximum 24-hour precipitation that occurs on average once in 25 years',
      'Code': 'I24H25Y',
      'UnitType': {'ID': 0, 'Unit': 'inches', 'Abbr': 'in'},
      'Value': -999.99,
      'Limits': {'Max': 5.29, 'Min': 5.18}},
     {'ID': 16497,
      'Name': 'Percent Forest',
      'Description': 'Percentage of area covered by forest',
      'Code': 'FOREST',
      'UnitType': {'ID': 0, 'Unit': 'percent', 'Abbr': '%'},
      'Value': -999.99,
      'Limits': {'

In [20]:
est = json.loads(r.content.decode())
for regregion in est[0]['RegressionRegions']:
    for p in regregion['Parameters']:
        if p['Code'] in use_codes.keys():
            p['Value'] = use_codes[p['Code']]
            #print(p['Code'], p['Value'], use_codes[p['Code']] )
#print("updated est")

In [21]:
payload = dict()
payload["Links"] = [{"rel": "self",
        "Href": Href_url,
        "method": "GET"}] 

#payload

In [22]:
for k, v in stats_groups.items():
    payload[k] = v
    
payload['StatisticGroupID'] = stats_group
#payload

In [23]:
rr_list=[]
for rr in est[0]['RegressionRegions']:
    reg_code = rr['Code']
    if reg_code.lower() in reg_codes:
        #print(rr['Code'])
        rr_list.append(rr)
        rr['PercentWeight']=rr_weight[reg_code.lower()]
        
payload['RegressionRegions'] = rr_list

In [24]:
peak_flow_url = f"https://streamstats.usgs.gov/nssservices/scenarios/estimate.json?region={rcode}&statisticgroups={stats_group}&regressionregions={reg_codes}&configs={configs}"
r = requests.post(peak_flow_url, json=json.loads(json.dumps([payload])))
r.json()

[{'StatisticGroupID': 2,
  'RegressionRegions': [],
  'Links': [{'rel': 'citations',
    'Href': 'https://streamstats.usgs.gov/nssservices/citations?regressionregions=',
    'method': 'GET'}]}]

# END