## Example script to convert USGS streamflow data into a calibration file

In [1]:
%matplotlib inline
import os
from matplotlib import pylab
import sys
from os.path import dirname
parent_dir = dirname(dirname(os.getcwd()))
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
import pickle
import geopandas as gp
from datetime import date
import pandas as pd
import numpy as np
import urllib2
import time
import sys

In [2]:
def getFlow(site):
    '''
        Input: USGS gage number
        
        Output: pandas data frame of volumetric streamflow (over period of record at the gage) indexed by datetime
    '''
    site = str(site)
    url = 'https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&site_no=' + site + '&referred_module=sw&period=&begin_date=1950-01-01&end_date=2017-09-01'
    response = urllib2.urlopen(url)
    content = response.read()
    f = open('flow_data/'+ site+'.txt','w')
    f.write(content)
    f.close()
    count = 0
    for line in open('flow_data/'+ site+'.txt','r').readlines():
        if line[0] == '#': 
            count += 1
        else:
            break

    df = pd.read_csv('flow_data/'+ site+'.txt', header=count, delimiter='\t')
    q_col = next(col for col in df.columns if col.endswith('00060_00003'))
    df.rename(columns={q_col: 'q', 'datetime':'date'}, inplace=True)    
    df = df.iloc[1:,:]
    df = df.dropna()
    df.date = pd.to_datetime(df.date)
    df.q = pd.to_numeric(df.q)
    df.index = pd.to_datetime(df.date)
    return df

def _finditem(obj, key):
    if key in obj: return obj[key]
    for k, v in obj.items():
        if isinstance(v,dict):
            return _finditem(v, key)

In [3]:
site_data = gp.read_file('./USGS_gages/USGS_Streamgages-NHD_Locations.shp')
sitelist = ['11475560']

for gage in sitelist:
    df = getFlow(gage)
    rng = df.index
    df = pd.DataFrame.from_dict({'runoff':df.q.values})
    df.index = rng
    df = df*2.44657555e9 # convert to cm^3/day
    area = float(site_data['DA_SQ_MILE'].loc[site_data.SITE_NO==gage])*1/0.000000000039 # in cm^2
    df = df/area
    pickle.dump( df, open('../../calibration_data/'+gage+'_runoff.p', 'wb') )
    