In [1]:
import argparse
import copy
import glob
import os
import pickle
import random
import shutil
import sys
import time
import traceback
from concurrent.futures import ProcessPoolExecutor, as_completed, wait
from datetime import datetime, timezone
from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from tools_shared_functions import (
    aggregate_wbd_hucs,
    filter_nwm_segments_by_stream_order,
    flow_data,
    get_metadata,
    get_nwm_segs,
    get_thresholds,
)

import utils.fim_logger as fl
from utils.shared_variables import VIZ_PROJECTION








In [2]:
## Testing get_metadata() functionality


# --------- Inputs --------- 


search = 5

nwm_us_search, nwm_ds_search = search, search


# output_catfim_dir = 
API_BASE_URL = 'https://nwcal-wrds.nwc.nws.noaa.gov/api/location/v3.0'
metadata_url = f'{API_BASE_URL}/metadata'


# lid_to_run = 
# nwm_metafile = 

# --------- Code --------- 

all_meta_lists = []


conus_list, ___ = get_metadata(
    metadata_url,
    select_by='nws_lid',
    selector=['all'],
    must_include='nws_data.rfc_forecast_point',
    upstream_trace_distance=nwm_us_search,
    downstream_trace_distance=nwm_ds_search,
)


# Get metadata for Islands and Alaska
islands_list, ___ = get_metadata(
    metadata_url,
    select_by='state',
    selector=['HI', 'PR', 'AK'],
    must_include=None,
    upstream_trace_distance=nwm_us_search,
    downstream_trace_distance=nwm_ds_search,
)
# Append the lists
all_meta_lists = conus_list + islands_list

# print(islands_list)

# with open(meta_file, "wb") as p_handle:
#     pickle.dump(all_meta_lists, p_handle, protocol=pickle.HIGHEST_PROTOCOL)


In [3]:
print(f'Length of conus_list: {len(conus_list)}')
print(f'Length of islands_list: {len(islands_list)}')
print(f'Length of all_meta_lists: {len(all_meta_lists)}')






Length of conus_list: 4679
Length of islands_list: 2952
Length of all_meta_lists: 7631


In [19]:
# ------ New addition: filtering ------



# -- function --
def filter_metadata_list (metadata_list, verbose):
    '''
    
    Filter metadata list to remove: 
    - sites where the nws_lid = None
    - duplicate sites
    
    '''

    unique_lids, duplicate_lids = [], []
    duplicate_metadata_list, unique_metadata_list = [], []
    
    nonelid_metadata_list = [] # TODO: remove eventually?    

    for i, site in enumerate(metadata_list):
        nws_lid = site['identifiers']['nws_lid']

        if nws_lid == None:
            # No LID available
            nonelid_metadata_list.append(site)
            
            # TODO: replace this with Continue, eventually we wont need this list
        
        elif nws_lid in unique_lids:
            # Duplicate LID
            duplicate_lids.append(nws_lid)
            duplicate_metadata_list.append(site)
            
        else: 
            # Unique/unseen LID that's not None
            unique_lids.append(nws_lid)
            unique_metadata_list.append(site)
            
    if verbose == True:
        print(f'Input metadata list length: {len(metadata_list)}')
        print(f'Output (unique) metadata list length: {len(unique_metadata_list)}')
        print(f'Number of unique LIDs: {len(unique_lids)} \nNumber of duplicate LIDs: {len(duplicate_lids)} \nNumber of None LIDs: {len(nonelid_metadata_list)}')
    
    return unique_lids, duplicate_lids, nonelid_metadata_list, duplicate_metadata_list, unique_metadata_list # TODO: eventually, have it only return necessary objects



unique_lids, duplicate_lids, nonelid_metadata_list, duplicate_metadata_list, unique_metadata_list =  filter_metadata_list(all_meta_lists, True)
print()




Input metadata list length: 7631
Output (unique) metadata list length: 7214
Number of unique LIDs: 7214 
Number of duplicate LIDs: 152 
Number of None LIDs: 265



In [26]:
state_list = ['Puerto Rico', 'Hawaii', 'Alaska']

for state in state_list: 
    print()
    print('Current Code: Single API call (only forecast points)')
    print()
    print('By-state site count:')
    currentcode_state = filter_by_state(state, conus_list, True)
    print()
    print('Proposed Update: Double API call (forecast points + all HI, AK, and PR points')
    print()
    print('By-state site count, before filtering out duplicates:')
    prefilt_state = filter_by_state(state, all_meta_lists, True)
    print()
    print('By-state site count, AFTER filtering out duplicates:')
    postfilt_state = filter_by_state(state, unique_metadata_list, True)



Current Code: Single API call (only forecast points)

By-state site count:
State: Puerto Rico 
Number of sites: 5

Proposed Update: Double API call (forecast points + all HI, AK, and PR points

By-state site count, before filtering out duplicates:
State: Puerto Rico 
Number of sites: 243

By-state site count, AFTER filtering out duplicates:
State: Puerto Rico 
Number of sites: 238

Current Code: Single API call (only forecast points)

By-state site count:
State: Hawaii 
Number of sites: 2

Proposed Update: Double API call (forecast points + all HI, AK, and PR points

By-state site count, before filtering out duplicates:
State: Hawaii 
Number of sites: 497

By-state site count, AFTER filtering out duplicates:
State: Hawaii 
Number of sites: 495

Current Code: Single API call (only forecast points)

By-state site count:
State: Alaska 
Number of sites: 145

Proposed Update: Double API call (forecast points + all HI, AK, and PR points

By-state site count, before filtering out duplicates:

In [30]:
postfilt_state = filter_by_state('Connecticut', unique_metadata_list, True)
postfilt_state = filter_by_state('New York', unique_metadata_list, True)
postfilt_state = filter_by_state('Texas', unique_metadata_list, True)

State: Connecticut 
Number of sites: 23
State: New York 
Number of sites: 142
State: Texas 
Number of sites: 380


In [9]:
## Current code formulation


unique_lids, duplicate_lids, nonelid_metadata_list, duplicate_metadata_list, unique_metadata_list =  filter_metadata_list(conus_list, True)
print()
conus_list_filt = filter_by_state('Alaska', conus_list, True)


Input metadata list length: 4679
Output (unique) metadata list length: 4679
Number of unique LIDs: 4679 
Number of duplicate LIDs: 0 
Number of None LIDs: 0
State: Alaska 
Number of sites: 145


In [112]:
# lid_list, duplicate_lid_list = list_duplicate_lids(all_meta_lists, True)

conus_list_filt = filter_by_lid(None, islands_list, True)


LID filter: None 
Number of sites: 265


In [19]:
from time import process_time

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 
Elapsed time: 16.981601495 16.980996046
Elapsed time during the whole program in seconds: 0.0006054489999982593


In [30]:
t1_start = process_time() # stopwatch
 

conus_list, ___ = get_metadata(
    metadata_url,
    select_by='nws_lid',
    selector=['all'],
    must_include='nws_data.rfc_forecast_point',
    upstream_trace_distance=nwm_us_search,
    downstream_trace_distance=nwm_ds_search,
)



print('List length:', len(conus_list))

t1_stop = process_time() # stopwatch
print('Runtime (seconds):', t1_stop-t1_start) 

List length: 4679
Runtime (seconds): 0.9184548869999993


In [91]:
# conus_list[1]

conus_list[1]['identifiers']['nws_lid']

'00RDR'

In [93]:
def list_of_lids(conus_list, verbose):
    '''
    Extract a list of LIDs from the conus_list
    '''
    lid_list = []
    for i, site in enumerate(conus_list):
        nws_lid = site['identifiers']['nws_lid']
        lid_list.append(nws_lid)
    if verbose == True:
        print(f'List of LIDs: {lid_list}')
        
    return lid_list



lid_list = list_of_lids(conus_list, True)

List of LIDs: ['00BRD', '00RDR', '1TEST', 'AABDB', 'AACLA', 'AACLS', 'AAMEE', 'AANG1', 'AARVB', 'ABAT2', 'ABBG1']


In [102]:
def list_duplicate_lids(conus_list, verbose):
    '''
    Extract a list of duplicate LIDs from the conus_list
    '''
    lid_list = []
    duplicate_lid_list = []
     
    
    for i, site in enumerate(conus_list):
        nws_lid = site['identifiers']['nws_lid']

        if nws_lid in lid_list:
            duplicate_lid_list.append(nws_lid)
        else: 
            lid_list.append(nws_lid)

    if verbose == True:
        print(f'Length of unique LID list: {len(lid_list)}')
        print(f'List of duplicate LIDs: {duplicate_lid_list}')

        
    return lid_list, duplicate_lid_list




lid_list, duplicate_lid_list = list_duplicate_lids(conus_list, True)

Length of unique LID list: 4679
List of duplicate LIDs: []


In [107]:
def filter_by_lid(lid_filter, conus_list, verbose):
    '''
    Function to filter conus_list by LID
    '''
    conus_list_filt = []
    for i, site in enumerate(conus_list):
        lid = site['identifiers']['nws_lid']
        if lid == lid_filter:
            conus_list_filt.append(site)
    if verbose == True:
        print(f'LID filter: {lid_filter} \nNumber of sites: {len(conus_list_filt)}')
        
    return conus_list_filt



conus_list_filt = filter_by_lid('None', conus_list, True)

LID filter: None 
Number of sites: 0


In [8]:
def filter_by_state(state_filter, conus_list, verbose):
    '''
    Function to filter conus_list by state
    '''
    conus_list_filt = []
    for i, site in enumerate(conus_list):
        state = site['nws_data']['state']
        if state == state_filter:
            conus_list_filt.append(site)
    if verbose == True:
        print(f'State: {state_filter} \nNumber of sites: {len(conus_list_filt)}')
        
    return conus_list_filt



conus_list_filt = filter_by_state('Alaska', conus_list, True)

State: Alaska 
Number of sites: 145


In [138]:
state_list, ___ = get_metadata(
    metadata_url,
    select_by='state',
    # selector=['HI', 'PR', 'AK'],
    selector=['AK'],

    # must_include='identifiers.nws_lid', ## ddin't work oh well
    must_include=None,

    # must_include='nws_data.rfc_forecast_point',
    upstream_trace_distance=nwm_us_search,
    downstream_trace_distance=nwm_ds_search,
)

In [139]:

unique_lids, duplicate_lids, nonelid_metadata_list, duplicate_metadata_list, unique_metadata_list =  filter_metadata_list(state_list, True)

Input metadata list length: 2047
Output (unique) metadata list length: 1950
Number of unique LIDs: 1950 
Number of duplicate LIDs: 0 
Number of None LIDs: 97


In [84]:
# filt_list = conus_list[conus_list['identifiers']['nws_lid']=='00RDR']

# type(conus_list)

# list of dictionaries of dictionaries 

# type(conus_list[1])

# conus_list[1].keys()

# conus_list[1]

In [29]:
t1_start = process_time() # stopwatch
 

conus_list, ___ = get_metadata(
    metadata_url,
    select_by='nws_lid',
    selector=['all'],
    # must_include='nws_data.rfc_forecast_point',
    upstream_trace_distance=nwm_us_search,
    downstream_trace_distance=nwm_ds_search,
)



print('List length:', len(conus_list))

t1_stop = process_time() # stopwatch
print('Runtime (seconds):', t1_stop-t1_start) 

List length: 79766
Runtime (seconds): 9.806367041999998


In [18]:
print(len(islands_list))

# islands_list[1]['identifiers']

# Get state name
# islands_list[1]['nws_data']['state']

islands_list[1]



2952


{'identifiers': {'nws_lid': '46003',
  'usgs_site_code': None,
  'nwm_feature_id': None,
  'goes_id': None,
  'env_can_gage_id': None},
 'nws_data': {'name': 'Southern Aleutians',
  'wfo': 'AFC',
  'rfc': 'APRFC',
  'geo_rfc': 'INVALID',
  'latitude': 51.851388888889,
  'longitude': -155.91583333333,
  'map_link': 'https://maps.google.com/maps?t=k&q=loc:51.851388888889+-155.91583333333',
  'horizontal_datum_name': 'NAD27',
  'state': 'Alaska',
  'county': None,
  'county_code': None,
  'huc': None,
  'hsa': 'AFC',
  'zero_datum': 0.0,
  'vertical_datum_name': None,
  'rfc_forecast_point': False,
  'rfc_defined_fcst_point': False,
  'riverpoint': True},
 'usgs_data': {'name': None,
  'geo_rfc': None,
  'latitude': None,
  'longitude': None,
  'map_link': None,
  'coord_accuracy_code': None,
  'latlon_datum_name': None,
  'coord_method_code': None,
  'state': None,
  'huc': None,
  'site_type': None,
  'altitude': None,
  'alt_accuracy_code': None,
  'alt_datum_code': None,
  'alt_method

In [None]:
# # local script calls __load_nwm_metadata so FLOG is already setup
# def __load_nwm_metadata(
#     output_catfim_dir, metadata_url, nwm_us_search, nwm_ds_search, lid_to_run, nwm_metafile
# ):

#     # FLOG.trace(metadata_url)

#     all_meta_lists = []
#     # Check to see if meta file already exists
#     # This feature means we can copy the pickle file to another enviro (AWS?) as it won't need to call
#     # WRDS unless we need a smaller or modified version. This one likely has all nws_lid data.

# #     if os.path.isfile(nwm_metafile) is True:
# #         FLOG.lprint(f"Meta file already downloaded and exists at {nwm_metafile}")

# #         with open(nwm_metafile, "rb") as p_handle:
# #             all_meta_lists = pickle.load(p_handle)

# #     else:
#         meta_file = os.path.join(output_catfim_dir, "nwm_metafile.pkl")

#         FLOG.lprint(f"Meta file will be downloaded and saved at {meta_file}")

#         # lid_to_run could be a single lid or the word "all"
#         # TODO: lid_to_run functionality... remove? for now, just hard code lid_to_run as "all"
#         if lid_to_run != "all":
#             all_meta_lists, ___ = get_metadata(
#                 metadata_url,
#                 select_by='nws_lid',
#                 selector=[lid_to_run],
#                 must_include='nws_data.rfc_forecast_point',
#                 upstream_trace_distance=nwm_us_search,
#                 downstream_trace_distance=nwm_ds_search,
#             )
#         else:
#         conus_list, ___ = get_metadata(
#             metadata_url,
#             select_by='nws_lid',
#             selector=['all'],
#             must_include='nws_data.rfc_forecast_point',
#             upstream_trace_distance=nwm_us_search,
#             downstream_trace_distance=nwm_ds_search,
#         )
#         # Get metadata for Islands and Alaska
#         islands_list, ___ = get_metadata(
#             metadata_url,
#             select_by='state',
#             selector=['HI', 'PR', 'AK'],
#             must_include=None,
#             upstream_trace_distance=nwm_us_search,
#             downstream_trace_distance=nwm_ds_search,
#         )
#         # Append the lists
#         all_meta_lists = conus_list + islands_list

#         with open(meta_file, "wb") as p_handle:
#             pickle.dump(all_meta_lists, p_handle, protocol=pickle.HIGHEST_PROTOCOL)

#     return all_meta_lists

# all_meta_lists = __load_nwm_metadata(
#     output_catfim_dir, metadata_url, nwm_us_search, nwm_ds_search, lid_to_run, nwm_metafile
# )
