In [1]:
%load_ext autoreload
%autoreload 2
# Standard imports
import pandas as pd
import matplotlib.pyplot as plt
import urllib3
urllib3.disable_warnings()

# Local imports
from hat.api_call import *
from hat.utils.hydropower_potential import *
from hat.utils.PyGeoTools.geolocation import *

# Query the data (using the bounding box based on the coordinates of the dam)

In [None]:
input_df = pd.read_csv('combined_hydro_stream_gauge_full2.csv')
input_df.head()

In [14]:
# Produce a bounding box of coordinates given the stream gauge coordinate
lat = input_df.loc[0,'lat']
lon = input_df.loc[0,'lon']
distance = 0.5 # [km]
print(lat, lon)

# Test bounding box
loc = GeoLocation.from_degrees(lat, lon)
SW_loc, NE_loc = loc.bounding_locations(distance)

# As needed for the API call using the bounding box
print("Northern-most lat: "+str(NE_loc.deg_lat))
print("Southern-most lat: "+str(SW_loc.deg_lat))
print("Western-most lon: "+str(SW_loc.deg_lon))
print("Eastern-most lon: "+str(NE_loc.deg_lon))

43.46821113 -71.66157073
Northern-most lat: 43.472702732476684
Southern-most lat: 43.46371952752332
Western-most lon: -71.66775958880788
Eastern-most lon: -71.65538187119209


In [15]:
# Bounding box query: [SW_loc.deg_lon, SW_loc.deg_lat, NE_loc.deg_lon, NE_loc.deg_lat]. Max number of decimals is 6 so rounding it to that using round().
query = {}
query['id'] = [str(round(SW_loc.deg_lon,6)), str(round(SW_loc.deg_lat, 6)), str(round(NE_loc.deg_lon,6)), str(round(NE_loc.deg_lat,6))] # ['-105.877600','40.138700','-105.854100','40.156700']
query['start_date'] = '2020-01-01'
query['end_date'] = '2020-01-02'


In [16]:
# Pull the data
df_raw = get_data(query, id_type='bBox', save_data=False)

https://waterservices.usgs.gov/nwis/iv/?format=json&bBox=-71.66776,43.46372,-71.655382,43.472703&startDT=2020-01-01&endDT=2020-01-02&parameterCd=00060&siteStatus=all
Success with data retrieval from API


In [17]:
# Build a dataframe with the minimal information needed for the query. This can be saved as a csv for future reference
start_date = '2019-01-01'
end_date = '2019-12-31'
df_len = input_df.lat.count()

rows = []

for i in range(df_len):
    rows.append([start_date, end_date, input_df.loc[i,'lat'], input_df.loc[i,'lon'], input_df.loc[i,'dam_name'],input_df.loc[i,'NID ID']])
    
query_df = pd.DataFrame(rows, columns=["start", "end", "lat", "lon", "name", "NID ID"])

In [18]:
def build_query(series):
    '''
    Builds the query dictionary based on a series taken from the query dataframe
    '''
    # Default distance to build the bounding box
    DISTANCE = 0.5 #[km]
    # Get the bounding box coordinates
    loc = GeoLocation.from_degrees(series['lat'], series['lon'])
    SW_loc, NE_loc = loc.bounding_locations(DISTANCE)
    
    # Build query dictionary
    query = {}
    query['id'] = [str(round(SW_loc.deg_lon,6)), str(round(SW_loc.deg_lat, 6)), str(round(NE_loc.deg_lon,6)), str(round(NE_loc.deg_lat,6))] # ['-105.877600','40.138700','-105.854100','40.156700']
    query['start_date'] = series['start']
    query['end_date'] = series['end']
    
    return query
    

In [19]:
# example for a single query
for idx, dam in query_df.iterrows():
    print(idx)
    query = build_query(dam)
    save_filename = os.path.join('data',f"{dam['NID ID']}_{dam['start']}_{dam['end']}.json")
    df_raw = get_data(query, id_type='bBox', save_data=True, path=save_filename)
    query_df.loc[idx,'filename'] = save_filename
    
    break

0
https://waterservices.usgs.gov/nwis/iv/?format=json&bBox=-71.66776,43.46372,-71.655382,43.472703&startDT=2019-01-01&endDT=2019-12-31&parameterCd=00060&siteStatus=all
Success with data retrieval from API


In [21]:
df_raw

[]