In [1]:
#libraries
import pandas as pd
from datetime import datetime, timedelta
import cwms
from dataretrieval import nwis
import requests
from json import loads

In [2]:
apiRoot = "https://cwms-data-test.cwbi.us/cwms-data/"
#apiRoot = "https://wm.mvp.ds.usace.army.mil:8243/mvp-data/"
from getpass import getpass
apiKey = "apikey " + getpass()
api = cwms.api.init_session(api_root=apiRoot, api_key=apiKey)

 ········


In [3]:
def get_rating_ids_from_specs(office_id):
    rating_types = ['EXSA', 'CORR', 'BASE']
    #templates = ['Stage;Flow.USGS-BASE', 'Stage;Flow.USGS-EXSA', 'Stage;Stage.USGS-CORR']
    rating_specs = cwms.get_rating_specs(office_id=office_id).df
    #rating_specs = rating_specs[rating_specs['template-id'].isin(templates)]
    rating_specs = rating_specs.dropna(subset=['description'])
    for rating_type in rating_types:
        rating_specs.loc[rating_specs['description'].str.contains(f'USGS-{rating_type}'), 'rating-type'] = rating_type
    rating_specs = rating_specs[(
        rating_specs['rating-type'].isin(rating_types)) & 
        (rating_specs['active']==True) & 
        (rating_specs['auto-update']==True)]
    return rating_specs

In [4]:
def get_rating_ids_from_template(template_id_mask:str, office_id:str = None):
    templates = cwms.get_rating_templates(template_id_mask=template_id_mask,office_id=office_id).df
    rating_specs = pd.DataFrame()
    for _, template in templates.iterrows():
        rating_ids = pd.DataFrame(template['rating-ids'])
        if not rating_ids.empty:
            rating_ids.columns = ["rating-id"]
            exp_ids = rating_ids['rating-id'].str.split('.',expand=True)
            exp_ids.columns = ["location-id","template-params","template-version","spec-version"]
            rating_spec = pd.concat([rating_ids,exp_ids],axis=1)
            rating_spec = rating_spec.assign(**{"office-id":template["office-id"],"template-id":template["id"]})
            rating_specs = pd.concat([rating_specs, rating_spec], ignore_index=True)
    return rating_specs


In [5]:
#rating_specs = get_rating_ids_from_template("Stage;Flow.EXSA","LRL")
#rating_specs = get_rating_ids_from_template(template_id_mask="*.USGS-*")
rating_specs = get_rating_ids_from_specs('MVP')

In [6]:
def get_location_aliases(df, loc_group_id, category_id, office_id):
    Locdf = cwms.get_location_group(loc_group_id=loc_group_id,category_id=category_id,office_id=office_id).df
    USGS_alias = Locdf[Locdf['alias-id'].notnull()]
    USGS_alias = USGS_alias.rename(columns = {'alias-id': 'USGS_St_Num','attribute':'Loc_attribute'})
    USGS_alias.USGS_St_Num = USGS_alias.USGS_St_Num.str.rjust(8,'0')
    USGS_ratings = pd.merge(df, USGS_alias,  how='inner', on=['location-id','office-id'])
    return USGS_ratings

In [7]:
USGS_ratings = get_location_aliases(rating_specs,"USGS Station Number","Agency Aliases","CWMS")

In [8]:
USGS_ratings

Unnamed: 0,office-id,rating-id,template-id,location-id,version,in-range-method,out-range-low-method,out-range-high-method,active,auto-update,auto-activate,auto-migrate-extension,independent-rounding-specs,dependent-rounding-spec,effective-dates,source-agency,description,rating-type,USGS_St_Num,Loc_attribute
0,MVP,ABRN8.Stage;Flow.USGS-BASE.USGS-NWIS,Stage;Flow.USGS-BASE,ABRN8,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '2223456782'}],2222233332,"[2012-09-12T05:00:00Z, 2012-09-12T05:00:00Z, 2...",,"WILD RICE RIVER NR ABERCROMBIE, ND USGS-NWIS S...",BASE,05053000,0.0
1,MVP,ABRN8.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,ABRN8,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '2223456782'}],2222233332,"[2014-11-04T21:15:00Z, 2014-11-04T21:15:00Z, 2...",,"WILD RICE RIVER NR ABERCROMBIE, ND Expanded, S...",EXSA,05053000,0.0
2,MVP,ABRN8.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,ABRN8,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '4444444444'}],4444444444,"[2014-11-04T21:10:00Z, 2014-11-04T21:10:00Z, 2...",,"WILD RICE RIVER NR ABERCROMBIE, ND Stage Corre...",CORR,05053000,0.0
3,MVP,AGYM5.Stage;Flow.USGS-BASE.USGS-NWIS,Stage;Flow.USGS-BASE,AGYM5,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '2223456782'}],2222233332,"[2012-10-13T05:00:00Z, 2012-10-13T05:00:00Z, 2...",,"MIDDLE RIVER AT ARGYLE, MN USGS-NWIS Stream Ra...",BASE,05087500,0.0
4,MVP,AGYM5.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,AGYM5,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '2223456782'}],2222233332,"[2014-10-17T15:48:00Z, 2014-10-17T15:48:00Z, 2...",,"MIDDLE RIVER AT ARGYLE, MN Expanded, Shift-Adj...",EXSA,05087500,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,MVP,Westhope.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,Westhope,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '4444444444'}],4444444444,"[2017-01-18T19:30:00Z, 2017-01-18T19:30:00Z, 2...",,"SOURIS RIVER NR WESTHOPE, ND Stage Correction ...",CORR,05124000,0.0
334,MVP,WillowCity.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,WillowCity,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '2223456782'}],2222233332,"[2010-03-01T06:00:00Z, 2010-03-01T06:00:00Z, 2...",,"WILLOW CREEK NR WILLOW CITY, ND Expanded, Shif...",EXSA,05123400,0.0
335,MVP,WillowCity.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,WillowCity,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '4444444444'}],4444444444,"[2015-03-18T18:15:00Z, 2015-03-18T18:15:00Z, 2...",,"WILLOW CREEK NR WILLOW CITY, ND Stage Correcti...",CORR,05123400,0.0
336,MVP,ZUMM5.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,ZUMM5,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '2223456782'}],2222233332,"[2013-01-01T21:58:00Z, 2013-01-01T21:58:00Z, 2...",,"ZUMBRO RIVER AT ZUMBRO FALLS, MN Expanded, Shi...",EXSA,05374000,0.0


In [9]:
USGS_ratings_empty = USGS_ratings[USGS_ratings['effective-dates'].isna()]

In [10]:
USGS_ratings = USGS_ratings[USGS_ratings['effective-dates'].notna()]

In [11]:
def get_usgs_updated_ratings(period):
    '''
    Function to grab data from the USGS based off of dataretieve-python
    ''' 
    # Get USGS data
    base_url = "https://nwis.waterdata.usgs.gov/nwisweb/get_ratings"

    query_dict = {
        "period":period,
        "format": "rdb"
    }

    r = requests.get(base_url, params = query_dict)
    temp = pd.DataFrame(r.text.split('\n'))
    temp = temp[temp[0].str.startswith("USGS")]
    updated_ratings = temp[0].str.split("\t",expand=True)
    updated_ratings.columns = ["org","USGS_St_Num","rating-type","date_updated","url"]
    updated_ratings["rating-type"] = updated_ratings["rating-type"].str.upper()
    return updated_ratings
    

In [12]:
df = get_usgs_updated_ratings(48)

In [13]:
updated_ratings = pd.merge(USGS_ratings, df, how='inner', left_on=['USGS_St_Num','rating-type'],right_on=['USGS_St_Num','rating-type'])

In [14]:
updated_ratings.loc[:,'effective-dates'] = updated_ratings['effective-dates'].apply(lambda x: [pd.to_datetime(d) for d in x])
updated_ratings.loc[:,'cwms_max_effective_date'] = updated_ratings['effective-dates'].apply(max)

In [15]:
if not USGS_ratings_empty.empty:
    updated_ratings = pd.concat([updated_ratings, USGS_ratings_empty],ignore_index=True)


In [16]:
updated_ratings

Unnamed: 0,office-id,rating-id,template-id,location-id,version,in-range-method,out-range-low-method,out-range-high-method,active,auto-update,...,effective-dates,source-agency,description,rating-type,USGS_St_Num,Loc_attribute,org,date_updated,url,cwms_max_effective_date
0,MVP,AGYM5.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,AGYM5,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-10-17 15:48:00+00:00, 2014-10-17 15:48:0...",,"MIDDLE RIVER AT ARGYLE, MN Expanded, Shift-Adj...",EXSA,5087500,0.0,USGS,2024-10-30 16:52:41,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-10-31 02:50:00+00:00
1,MVP,AGYM5.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,AGYM5,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-10-17 15:48:00+00:00, 2014-10-17 15:48:0...",,"MIDDLE RIVER AT ARGYLE, MN Stage Correction US...",CORR,5087500,0.0,USGS,2024-10-30 16:52:41,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-10-31 02:50:00+00:00
2,MVP,BRMN8.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,BRMN8,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-04-21 21:52:00+00:00, 2014-04-21 21:52:0...",,SHEYENNE R BL DEVILS LK STATE OUTLET NR BREMEN...,EXSA,5055400,0.0,USGS,2024-10-31 15:36:37,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-11-01 01:35:00+00:00
3,MVP,BRMN8.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,BRMN8,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-10-07 16:30:00+00:00, 2014-10-07 16:30:0...",,SHEYENNE R BL DEVILS LK STATE OUTLET NR BREMEN...,CORR,5055400,0.0,USGS,2024-10-31 15:36:37,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-11-01 01:35:00+00:00
4,MVP,DBCN8.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,DBCN8,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-10-30 14:30:00+00:00, 2014-10-30 14:30:0...",,"BALDHILL CREEK NR DAZEY, ND Stage Correction U...",CORR,5057200,0.0,USGS,2024-10-31 02:56:46,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-10-08 22:00:00+00:00
5,MVP,DDGW3.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,DDGW3,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2015-09-03 19:59:00+00:00, 2015-09-03 19:59:0...",,"TREMPEALEAU RIVER AT DODGE, WI Expanded, Shift...",EXSA,5379500,0.0,USGS,2024-10-31 13:01:07,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-10-31 23:00:00+00:00
6,MVP,DDGW3.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,DDGW3,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2015-10-08 14:50:00+00:00, 2015-10-08 14:50:0...",,"TREMPEALEAU RIVER AT DODGE, WI Stage Correctio...",CORR,5379500,0.0,USGS,2024-10-31 13:01:07,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-10-31 23:00:00+00:00
7,MVP,FSHM5.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,FSHM5,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-08-11 19:05:00+00:00, 2014-08-11 19:05:0...",,"RED LAKE RIVER AT FISHER, MN Expanded, Shift-A...",EXSA,5080000,0.0,USGS,2024-10-31 17:16:18,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-11-01 03:15:00+00:00
8,MVP,FSHM5.Stage;Stage.USGS-CORR.USGS-NWIS,Stage;Stage.USGS-CORR,FSHM5,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-08-11 19:05:00+00:00, 2014-08-11 19:05:0...",,"RED LAKE RIVER AT FISHER, MN Stage Correction ...",CORR,5080000,0.0,USGS,2024-10-31 17:16:18,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-09-25 21:00:00+00:00
9,MVP,HRCN8.Stage;Flow.USGS-EXSA.USGS-NWIS,Stage;Flow.USGS-EXSA,HRCN8,USGS-NWIS,LINEAR,NEAREST,NEAREST,True,True,...,"[2014-10-22 14:00:00+00:00, 2014-10-22 14:00:0...",,"SHEYENNE R AB SHEYENNE R DIVERSION NR HORACE, ...",EXSA,5059300,0.0,USGS,2024-10-31 17:41:23,https://waterdata.usgs.gov/nwisweb/data/rating...,2024-11-01 03:40:00+00:00


In [17]:
def convert_tz(tz:str):
    if tz in ("AST","ADT"): tzid = "America/Halifax"
    elif tz in ("EST","EDT"): tzid = "US/Eastern"
    elif tz in ("CST","CDT"): tzid = "US/Central"
    elif tz in ("MST","MDT"): tzid = "US/Mountain"
    elif tz in ("PST","PDT"): tzid = "US/Pacific"
    elif tz in ("AKST","AKDT"): tzid = "America/Anchorage"
    elif tz in ("UTC","GMT"): tzid = "UTC"
    else: tzid = tz
    return tzid


In [18]:
def get_usgs_tz(data):
    line = data[data[0].str.startswith("# //STATION AGENCY=")].iloc[0,0]
    timezone = line.split("TIME_ZONE=")[1].split()[0].replace('"', '')
    timezone = convert_tz(timezone)
    return timezone

In [19]:
def get_begin_with_date(data,str_starts):
    date_string = None
    lines = data[data[0].str.startswith(str_starts)]
    for _, line in lines.iterrows():
        timestr = line[0].split("BEGIN=")[1].split()[0].strip().replace('"', '')
        if timestr.isdigit(): date_string = timestr
    return date_string


In [20]:
def get_usgs_effective_date(data,rating_type):

    date_string = None
    if rating_type == 'EXSA':
        line = data[data[0].str.startswith("# //RATING SHIFTED=")].iloc[0,0]  
        rating_shifted_date = line.split('=')[1].replace('"', '')
        date_string = rating_shifted_date.split()[0]

    elif rating_type == 'BASE':
        date_string = get_begin_with_date(data, ("# //RATING_DATETIME BEGIN="))

    elif rating_type == 'CORR':
        date_string = get_begin_with_date(data, ("# //CORR1_PREV BEGIN=","# //CORR2_PREV BEGIN=","# //CORR3_PREV BEGIN="))
        
    if date_string is None:
        line = data[data[0].str.startswith("# //RETRIEVED:")].iloc[0,0]
        date_string = line.split('RETRIEVED: ')[1]
        
    timezone = get_usgs_tz(data)   
    dt = pd.to_datetime(date_string).tz_localize(timezone).floor('Min')
    return dt
    


In [21]:
def convert_usgs_rating_df(df, rating_type):
    if rating_type == 'CORR':
        df = df.groupby("CORR")
        df = pd.concat([df.first(),df.last()], ignore_index=True, join="inner")
        df = df.sort_values(by=['INDEP'],ignore_index=True)
    df = df.rename(columns={"INDEP": "ind", "CORRINDEP": "dep", "DEP":"dep"})
    df_out = df[['ind', 'dep']].copy()
    return df_out

In [22]:
rating_units={'EXSA':'ft;cfs','BASE':'ft;cfs','CORR':'ft;ft'}
for _, row in updated_ratings.iterrows():
    print(f'Getting data for rating ID = {row["rating-id"]}')
    print(f'Getting data from USGS for USGS ID = {row["USGS_St_Num"]}, Rating Type = {row["rating-type"]}')
    usgs_rating, meta = nwis.get_ratings(site=row['USGS_St_Num'], file_type=str(row['rating-type']).lower())
    url = meta.url
    print(row['rating-id'])
    if usgs_rating.empty:
        print("Empty rating obtained from USGS Rating Curve not saved")
    else: 
        response = requests.get(url)
        temp = pd.DataFrame(response.text.split('\n'))
        usgs_effective_date = get_usgs_effective_date(temp,row['rating-type'])
        cwms_effective_date = row['cwms_max_effective_date']
        if (cwms_effective_date == usgs_effective_date) or (cwms_effective_date == (usgs_effective_date + timedelta(hours=1))):
            print("Effective dates are the same rating curve will not be saved")
        else:
            print(f"cwms = {cwms_effective_date}, usgs = {usgs_effective_date}")
            usgs_store_rating = convert_usgs_rating_df(usgs_rating,row['rating-type'])
            #find out how to get units
            if row['auto-migrate-extension']:
                current_rating = cwms.get_ratings(
                                        rating_id=row['rating-id'],
                                        office_id=row['office-id'],
                                        begin=cwms_effective_date,
                                        end=cwms_effective_date,
                                        method="EAGER",
                                        single_rating_df=True)
                rating_json = current_rating.json
                points_json = loads(usgs_store_rating.to_json(orient="records"))
                rating_json["simple-rating"]["rating-points"] = {"point": points_json}
                rating_json["simple-rating"]["effective-date"] = usgs_effective_date.isoformat()
                del rating_json["simple-rating"]["create-date"]
                rating_json["simple-rating"]["active"] = row['auto-activate']
            else:
                rating_json = cwms.rating_simple_df_to_json(data=usgs_store_rating,rating_id=row['rating-id'],office_id=row['office-id'],units=rating_units[row['rating-type']],effective_date=usgs_effective_date,active=row['auto-activate'])
            response = cwms.update_ratings(data = rating_json, rating_id = row['rating-id'])
            print(f'Stored rating for rating id = {row["rating-id"]}, effective date = {usgs_effective_date}')
        print(url)

Getting data for rating ID = AGYM5.Stage;Flow.USGS-EXSA.USGS-NWIS
Getting data from USGS for USGS ID = 05087500, Rating Type = EXSA
AGYM5.Stage;Flow.USGS-EXSA.USGS-NWIS
 effective date the same rating will not be updated
https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no=05087500&file_type=exsa
Getting data for rating ID = AGYM5.Stage;Stage.USGS-CORR.USGS-NWIS
Getting data from USGS for USGS ID = 05087500, Rating Type = CORR
AGYM5.Stage;Stage.USGS-CORR.USGS-NWIS
 effective date the same rating will not be updated
https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no=05087500&file_type=corr
Getting data for rating ID = BRMN8.Stage;Flow.USGS-EXSA.USGS-NWIS
Getting data from USGS for USGS ID = 05055400, Rating Type = EXSA
BRMN8.Stage;Flow.USGS-EXSA.USGS-NWIS
 effective date the same rating will not be updated
https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no=05055400&file_type=exsa
Getting data for rating ID = BRMN8.Stage;Stage.USGS-CORR.USGS-NWIS
Getting data 

In [23]:
rating_units = {'EXSA':'ft;cfs','BASE':'ft;cfs','CORR':'ft;ft'}

In [24]:
rating_units['EXSA']

'ft;cfs'