In [3]:
import teehr.queries.duckdb as tqd

import dashboard_utils as du
import importlib
from datetime import datetime, timedelta
from pathlib import Path
import pandas as pd
import geopandas as gpd

In [4]:
# evaluation study directory
STUDY_DIR = Path("/home", "jovyan", "shared", "rti-eval", "post-event-example")

# medium range streamflow forecast evaluation files 
scenario = dict(
    scenario_name="medium_range",
    variable="streamflow",
    primary_filepath=Path(STUDY_DIR, "timeseries", "usgs", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "usgs_nwm22_crosswalk.parquet"),
    geometry_filepath=Path(STUDY_DIR, "geo", "usgs_geometry.parquet")
)
attribute_paths = dict(
    usgs_upstream_area=Path(STUDY_DIR, "geo", "usgs_attr_upstream_area.parquet"),
    usgs_ecoregions=Path(STUDY_DIR, "geo", "usgs_attr_ecoregions.parquet"),
    usgs_stream_order=Path(STUDY_DIR, "geo", "usgs_attr_stream_order.parquet"),
    usgs_huc_crosswalk=Path(STUDY_DIR, "geo", "usgs_huc12_crosswalk.parquet"),
)



In [11]:
# build the filters
filters=[]    
location_list = du.get_usgs_locations_within_huc(huc_level=2, huc_id='18', attribute_paths=attribute_paths)
location_list.sort()
filters.append(
    {
        "column": "primary_location_id",
        "operator": "in",
        "value": location_list
    }
)
filters.append(
    {
        "column": "reference_time",
        "operator": "=",
        "value": f"{datetime(2023, 1, 1, 12)}"
    }
)
# get metrics
gdf = tqd.get_metrics(
    primary_filepath=scenario["primary_filepath"],
    secondary_filepath=scenario["secondary_filepath"],
    crosswalk_filepath=scenario["crosswalk_filepath"],       
    group_by=['primary_location_id','reference_time'],    
    order_by=['primary_location_id'],
    filters=filters,
    return_query=False,
    geometry_filepath=scenario["geometry_filepath"],       
    include_geometry=True,
    include_metrics=['max_value_delta'],
) 

print(len(gdf))
print(gdf.iloc[220:230,0:3])

346
    primary_location_id      reference_time  max_value_delta
220       usgs-11413000 2023-01-01 12:00:00        -6.409676
221       usgs-11418500 2023-01-01 12:00:00         6.475594
222       usgs-11421000 2023-01-01 12:00:00       -46.796204
223       usgs-11424000 2023-01-01 12:00:00      -183.797226
224       usgs-11425500 2023-01-01 12:00:00        48.188599
225       usgs-11427000 2023-01-01 12:00:00       169.824768
226       usgs-11446500 2023-01-01 12:00:00      -805.538605
227       usgs-11447360 2023-01-01 12:00:00         0.537109
228       usgs-11447650 2023-01-01 12:00:00      -198.369629
229       usgs-11447830 2023-01-01 12:00:00      -322.569977


In [12]:
order_by=['primary_location_id']
gdf = gdf.sort_values(order_by)
print(gdf.iloc[220:230,0:3])

    primary_location_id      reference_time  max_value_delta
203       usgs-11342000 2023-01-01 12:00:00      -217.577515
204       usgs-11345500 2023-01-01 12:00:00     28319.708359
205       usgs-11348500 2023-01-01 12:00:00       -29.589226
206       usgs-11355010 2023-01-01 12:00:00       -60.691051
207       usgs-11355500 2023-01-01 12:00:00        -0.221190
208       usgs-11370500 2023-01-01 12:00:00        68.822601
209       usgs-11372000 2023-01-01 12:00:00       -36.240555
335       usgs-11374000 2023-01-01 12:00:00       439.767395
210       usgs-11376000 2023-01-01 12:00:00      -535.028915
211       usgs-11376550 2023-01-01 12:00:00        40.877945
