In [1]:
#libraries
import pandas as pd
from datetime import datetime, timedelta
import cwms
from dataretrieval import nwis
import requests
from json import loads

In [2]:
apiRoot = "https://water.dev.cwbi.us/cwms-data/"
#apiRoot = "https://wm.mvp.ds.usace.army.mil:8243/mvp-data/"
from getpass import getpass
apiKey = "apikey " + getpass()
api = cwms.api.init_session(api_root=apiRoot, api_key=apiKey)

 ········


In [3]:
def get_rating_ids_from_specs(office_id):
    rating_types = ['EXSA', 'CORR', 'BASE']
    #templates = ['Stage;Flow.USGS-BASE', 'Stage;Flow.USGS-EXSA', 'Stage;Stage.USGS-CORR']
    rating_specs = cwms.get_rating_specs(office_id=office_id).df
    #rating_specs = rating_specs[rating_specs['template-id'].isin(templates)]
    rating_specs = rating_specs.dropna(subset=['description'])
    for rating_type in rating_types:
        rating_specs.loc[rating_specs['description'].str.contains(f'USGS-{rating_type}'), 'rating-type'] = rating_type
    rating_specs = rating_specs[(
        rating_specs['rating-type'].isin(rating_types)) & 
        (rating_specs['active']==True) & 
        (rating_specs['auto-update']==True)]
    return rating_specs

In [4]:
def get_rating_ids_from_template(template_id_mask:str, office_id:str = None):
    templates = cwms.get_rating_templates(template_id_mask=template_id_mask,office_id=office_id).df
    rating_specs = pd.DataFrame()
    for _, template in templates.iterrows():
        rating_ids = pd.DataFrame(template['rating-ids'])
        if not rating_ids.empty:
            rating_ids.columns = ["rating-id"]
            exp_ids = rating_ids['rating-id'].str.split('.',expand=True)
            exp_ids.columns = ["location-id","template-params","template-version","spec-version"]
            rating_spec = pd.concat([rating_ids,exp_ids],axis=1)
            rating_spec = rating_spec.assign(**{"office-id":template["office-id"],"template-id":template["id"]})
            rating_specs = pd.concat([rating_specs, rating_spec], ignore_index=True)
    return rating_specs


In [6]:
#rating_specs = get_rating_ids_from_template("Stage;Flow.EXSA","LRL")
#rating_specs = get_rating_ids_from_template(template_id_mask="*.USGS-*")
rating_specs = get_rating_ids_from_specs('LRL')

In [7]:
def get_location_aliases(df, loc_group_id, category_id, office_id):
    Locdf = cwms.get_location_group(loc_group_id=loc_group_id,category_id=category_id,office_id=office_id).df
    USGS_alias = Locdf[Locdf['alias-id'].notnull()]
    USGS_alias = USGS_alias.rename(columns = {'alias-id': 'USGS_St_Num','attribute':'Loc_attribute'})
    USGS_alias.USGS_St_Num = USGS_alias.USGS_St_Num.str.rjust(8,'0')
    USGS_ratings = pd.merge(df, USGS_alias,  how='inner', on=['location-id','office-id'])
    return USGS_ratings

In [8]:
USGS_ratings = get_location_aliases(rating_specs,"USGS Station Number","Agency Aliases","CWMS")

In [9]:
USGS_ratings

Unnamed: 0,office-id,rating-id,template-id,location-id,version,source-agency,in-range-method,out-range-low-method,out-range-high-method,active,auto-update,auto-activate,auto-migrate-extension,independent-rounding-specs,dependent-rounding-spec,description,effective-dates,rating-type,USGS_St_Num,Loc_attribute
0,LRL,Abington.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Abington,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,False,[{'value': '4444444444'}],4444444444,EAST%20FORK%20WHITEWATER%20RIVER%20AT%20ABINGT...,[2025-04-08T17:55:00Z],EXSA,03275600,
1,LRL,Adamsboro.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Adamsboro,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,False,[{'value': '4444444444'}],4444444444,EEL%20RIVER%20NEAR%20LOGANSPORT%2C%20IN%20Expa...,[2025-03-05T00:20:00Z],EXSA,03328500,
2,LRL,Alvaton.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Alvaton,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,False,[{'value': '4444444444'}],4444444444,DRAKES%20CREEK%20NEAR%20ALVATON%2C%20KY%20Expa...,[2025-02-17T04:00:00Z],EXSA,03314000,
3,LRL,Apex.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Apex,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,False,[{'value': '4444444444'}],4444444444,POND%20RIVER%20NEAR%20APEX%2C%20KY%20Expanded%...,[2025-04-07T15:50:00Z],EXSA,03320500,
4,LRL,Bardstown.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Bardstown,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,False,[{'value': '4444444444'}],4444444444,BEECH%20FORK%20AT%20BARDSTOWN%2C%20KY%20Expand...,[2024-05-24T00:20:00Z],EXSA,03301000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,LRL,Zionsville.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Zionsville,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,False,[{'value': '4444444444'}],4444444444,EAGLE%20CREEK%20AT%20ZIONSVILLE%2C%20IN%20Expa...,[2025-04-24T00:30:00Z],EXSA,03353200,0.0
101,LRL,Spencer.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Spencer,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '2222233332'}],2222233332,USGS-EXSA,,EXSA,03357000,0.0
102,LRL,Jasper.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Jasper,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '4444444444'}],4444444444,PATOKA%20RIVER%20AT%20JASPER%2C%20IN%20Expande...,,EXSA,03375500,0.0
103,LRL,Alpine.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Alpine,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,True,True,True,[{'value': '4444444444'}],4444444444,WHITEWATER%20RIVER%20NEAR%20ALPINE%2C%20IN%20E...,,EXSA,03275000,


In [10]:
USGS_ratings_empty = USGS_ratings[USGS_ratings['effective-dates'].isna()]

In [11]:
USGS_ratings = USGS_ratings[USGS_ratings['effective-dates'].notna()]

In [12]:
def get_usgs_updated_ratings(period):
    '''
    Function to grab data from the USGS based off of dataretieve-python
    ''' 
    # Get USGS data
    base_url = "https://nwis.waterdata.usgs.gov/nwisweb/get_ratings"

    query_dict = {
        "period":period,
        "format": "rdb"
    }

    r = requests.get(base_url, params = query_dict)
    temp = pd.DataFrame(r.text.split('\n'))
    temp = temp[temp[0].str.startswith("USGS")]
    updated_ratings = temp[0].str.split("\t",expand=True)
    updated_ratings.columns = ["org","USGS_St_Num","rating-type","date_updated","url"]
    updated_ratings["rating-type"] = updated_ratings["rating-type"].str.upper()
    return updated_ratings
    

In [13]:
df = get_usgs_updated_ratings(48)

In [14]:
updated_ratings = pd.merge(USGS_ratings, df, how='inner', left_on=['USGS_St_Num','rating-type'],right_on=['USGS_St_Num','rating-type'])

In [15]:
updated_ratings.loc[:,'effective-dates'] = updated_ratings['effective-dates'].apply(lambda x: [pd.to_datetime(d) for d in x])
updated_ratings.loc[:,'cwms_max_effective_date'] = updated_ratings['effective-dates'].apply(max)

In [16]:
if not USGS_ratings_empty.empty:
    updated_ratings = pd.concat([updated_ratings, USGS_ratings_empty],ignore_index=True)


In [25]:
updated_ratings

Unnamed: 0,office-id,rating-id,template-id,location-id,version,source-agency,in-range-method,out-range-low-method,out-range-high-method,active,...,dependent-rounding-spec,description,effective-dates,rating-type,USGS_St_Num,Loc_attribute,org,date_updated,url,cwms_max_effective_date
0,LRL,Centerton.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Centerton,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,...,4444444444,WHITE%20RIVER%20NEAR%20CENTERTON%2C%20IN%20Exp...,[2025-04-23 17:40:00+00:00],EXSA,3354000,0.0,USGS,2025-04-23 09:49:30,https://waterdata.usgs.gov/nwisweb/data/rating...,2025-04-23 17:40:00+00:00
1,LRL,Zionsville.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Zionsville,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,...,4444444444,EAGLE%20CREEK%20AT%20ZIONSVILLE%2C%20IN%20Expa...,[2025-04-24 00:30:00+00:00],EXSA,3353200,0.0,USGS,2025-04-23 16:31:17,https://waterdata.usgs.gov/nwisweb/data/rating...,2025-04-24 00:30:00+00:00
2,LRL,Spencer.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Spencer,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,...,2222233332,USGS-EXSA,,EXSA,3357000,0.0,,,,NaT
3,LRL,Jasper.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Jasper,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,...,4444444444,PATOKA%20RIVER%20AT%20JASPER%2C%20IN%20Expande...,,EXSA,3375500,0.0,,,,NaT
4,LRL,Alpine.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,Alpine,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,...,4444444444,WHITEWATER%20RIVER%20NEAR%20ALPINE%2C%20IN%20E...,,EXSA,3275000,,,,,NaT
5,LRL,BowlingGreenIN.Stage;Flow.EXSA.USGS-NWIS,Stage;Flow.EXSA,BowlingGreenIN,USGS-NWIS,USGS,LINEAR,NEAREST,NEAREST,True,...,4444444444,EEL%20RIVER%20AT%20BOWLING%20GREEN%2C%20IN%20E...,,EXSA,3360000,,,,,NaT


In [18]:
def convert_tz(tz:str):
    if tz in ("AST","ADT"): tzid = "America/Halifax"
    elif tz in ("EST","EDT"): tzid = "US/Eastern"
    elif tz in ("CST","CDT"): tzid = "US/Central"
    elif tz in ("MST","MDT"): tzid = "US/Mountain"
    elif tz in ("PST","PDT"): tzid = "US/Pacific"
    elif tz in ("AKST","AKDT"): tzid = "America/Anchorage"
    elif tz in ("UTC","GMT"): tzid = "UTC"
    else: tzid = tz
    return tzid


In [19]:
def get_usgs_tz(data):
    line = data[data[0].str.startswith("# //STATION AGENCY=")].iloc[0,0]
    timezone = line.split("TIME_ZONE=")[1].split()[0].replace('"', '')
    timezone = convert_tz(timezone)
    return timezone

In [20]:
def get_begin_with_date(data,str_starts):
    date_string = None
    lines = data[data[0].str.startswith(str_starts)]
    for _, line in lines.iterrows():
        timestr = line[0].split("BEGIN=")[1].split()[0].strip().replace('"', '')
        if timestr.isdigit(): date_string = timestr
    return date_string


In [21]:
def get_usgs_effective_date(data,rating_type):

    date_string = None
    if rating_type == 'EXSA':
        line = data[data[0].str.startswith("# //RATING SHIFTED=")].iloc[0,0]  
        rating_shifted_date = line.split('=')[1].replace('"', '')
        date_string = rating_shifted_date.split()[0]

    elif rating_type == 'BASE':
        date_string = get_begin_with_date(data, ("# //RATING_DATETIME BEGIN="))

    elif rating_type == 'CORR':
        date_string = get_begin_with_date(data, ("# //CORR1_PREV BEGIN=","# //CORR2_PREV BEGIN=","# //CORR3_PREV BEGIN="))
        
    if date_string is None:
        line = data[data[0].str.startswith("# //RETRIEVED:")].iloc[0,0]
        date_string = line.split('RETRIEVED: ')[1]
        
    timezone = get_usgs_tz(data)   
    dt = pd.to_datetime(date_string).tz_localize(timezone).floor('Min')
    return dt
    


In [22]:
def convert_usgs_rating_df(df, rating_type):
    if rating_type == 'CORR':
        df = df.groupby("CORR")
        df = pd.concat([df.first(),df.last()], ignore_index=True, join="inner")
        df = df.sort_values(by=['INDEP'],ignore_index=True)
    df = df.rename(columns={"INDEP": "ind", "CORRINDEP": "dep", "DEP":"dep"})
    df_out = df[['ind', 'dep']].copy()
    return df_out

In [26]:
rating_units={'EXSA':'ft;cfs','BASE':'ft;cfs','CORR':'ft;ft'}
for _, row in updated_ratings.iterrows():
    print(f'Getting data for rating ID = {row["rating-id"]}')
    print(f'Getting data from USGS for USGS ID = {row["USGS_St_Num"]}, Rating Type = {row["rating-type"]}')
    usgs_rating, meta = nwis.get_ratings(site=row['USGS_St_Num'], file_type=str(row['rating-type']).lower())
    url = meta.url
    print(row['rating-id'])
    if usgs_rating.empty:
        print("Empty rating obtained from USGS Rating Curve not saved")
    else: 
        response = requests.get(url)
        temp = pd.DataFrame(response.text.split('\n'))
        usgs_effective_date = get_usgs_effective_date(temp,row['rating-type'])
        cwms_effective_date = row['cwms_max_effective_date']
        if (cwms_effective_date == usgs_effective_date) or (cwms_effective_date == (usgs_effective_date + timedelta(hours=1))):
            print("Effective dates are the same rating curve will not be saved")
        else:
            print(f"cwms = {cwms_effective_date}, usgs = {usgs_effective_date}")
            usgs_store_rating = convert_usgs_rating_df(usgs_rating,row['rating-type'])
            #find out how to get units
            if row['auto-migrate-extension'] and pd.notna(cwms_effective_date):
                current_rating = cwms.get_ratings(
                                        rating_id=row['rating-id'],
                                        office_id=row['office-id'],
                                        begin=cwms_effective_date,
                                        end=cwms_effective_date,
                                        method="EAGER",
                                        single_rating_df=True)
                rating_json = current_rating.json
                points_json = loads(usgs_store_rating.to_json(orient="records"))
                rating_json["simple-rating"]["rating-points"] = {"point": points_json}
                rating_json["simple-rating"]["effective-date"] = usgs_effective_date.isoformat()
                del rating_json["simple-rating"]["create-date"]
                rating_json["simple-rating"]["active"] = row['auto-activate']
            else:
                rating_json = cwms.rating_simple_df_to_json(data=usgs_store_rating,rating_id=row['rating-id'],office_id=row['office-id'],units=rating_units[row['rating-type']],effective_date=usgs_effective_date,active=row['auto-activate'])
            response = cwms.update_ratings(data = rating_json, rating_id = row['rating-id'])
            print(f'Stored rating for rating id = {row["rating-id"]}, effective date = {usgs_effective_date}')
        print(url)

Getting data for rating ID = Centerton.Stage;Flow.EXSA.USGS-NWIS
Getting data from USGS for USGS ID = 03354000, Rating Type = EXSA
Centerton.Stage;Flow.EXSA.USGS-NWIS
Effective dates are the same rating curve will not be saved
https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no=03354000&file_type=exsa
Getting data for rating ID = Zionsville.Stage;Flow.EXSA.USGS-NWIS
Getting data from USGS for USGS ID = 03353200, Rating Type = EXSA
Zionsville.Stage;Flow.EXSA.USGS-NWIS
Effective dates are the same rating curve will not be saved
https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no=03353200&file_type=exsa
Getting data for rating ID = Spencer.Stage;Flow.EXSA.USGS-NWIS
Getting data from USGS for USGS ID = 03357000, Rating Type = EXSA
Spencer.Stage;Flow.EXSA.USGS-NWIS
cwms = NaT, usgs = 2025-03-24 14:40:00-04:00
Stored rating for rating id = Spencer.Stage;Flow.EXSA.USGS-NWIS, effective date = 2025-03-24 14:40:00-04:00
https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_

In [23]:
rating_units = {'EXSA':'ft;cfs','BASE':'ft;cfs','CORR':'ft;ft'}

In [24]:
rating_units['EXSA']

'ft;cfs'