## Widgets to facilitate evaluation queries

.... brief explanation,   
concise/rapid approach to subset the timeseries you want to evaluate and get either the raw data or summary/comparison metrics  
efficient, enables querying the data 'on the fly' within interactive visualizations



### Install and Import packages

In [None]:
%%capture
!pip install spatialpandas colormap colorcet duckdb
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@main'
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@39d6627e4f49b0bdeab3a4c4e8837e6ce5a15f78'

In [3]:
import teehr.queries.duckdb as tqd

# dashboard functions
import postevent_dashboard_utils as du
import importlib

from pathlib import Path

## Point to the data that will be used for the evaluation


These are the evaluation scenario definitions - specific variables and configurations to be compared within the overall study.
We need to specify all the parquet files containing the data we want to evaluate, as well as some necessary associated data (geometry, crosswalks, and attributes).
These files dictate the specific study (directory name), forecast configuration, and source of verifying data used in this evaluation.



In [8]:
# overall study directory
STUDY_DIR = Path("/home", "jovyan", "shared", "rti-eval", "post-event-example")

# medium range streamflow forecast evaluation files 
MRF_streamflow = dict(
    scenario_name="medium_range",
    variable="streamflow",
    primary_filepath=Path(STUDY_DIR, "timeseries", "usgs", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "usgs_nwm22_crosswalk.parquet"),
    geometry_filepath=Path(STUDY_DIR, "geo", "usgs_geometry.parquet")
)
# medium range precip forecast evaluation files
MRF_forcing = dict(
    scenario_name="medium_range",
    variable="precipitation",    
    primary_filepath=Path(STUDY_DIR, "timeseries", "forcing_analysis_assim", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_medium_range", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "huc10_huc10_crosswalk.parquet"),                    # the primary and secondary are both HUC10
    geometry_filepath=Path(STUDY_DIR, "geo", "huc10_geometry.parquet"),
)

# short range streamflow forecast evaluation files 
SRF_streamflow = dict(
    scenario_name="short_range",
    variable="streamflow",
    primary_filepath=MRF_streamflow["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

# medium range precip forecast evaluation files
SRF_forcing = dict(
    scenario_name="short_range",
    variable="precipitation",    
    primary_filepath=MRF_forcing["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_short_range", "*.parquet"),
    crosswalk_filepath=MRF_forcing["crosswalk_filepath"],
    geometry_filepath=MRF_forcing["geometry_filepath"],
)

eval_scenarios = [MRF_streamflow, MRF_forcing, SRF_streamflow, SRF_forcing]

attribute_paths = dict(
    usgs_upstream_area=Path(STUDY_DIR, "geo", "usgs_attr_upstream_area.parquet"),
    usgs_ecoregions=Path(STUDY_DIR, "geo", "usgs_attr_ecoregions.parquet"),
    usgs_stream_order=Path(STUDY_DIR, "geo", "usgs_attr_stream_order.parquet"),
    usgs_huc_crosswalk=Path(STUDY_DIR, "geo", "usgs_huc12_crosswalk.parquet"),
    nwm22_huc_crosswalk=Path(STUDY_DIR, "geo", "nwm22_huc12_crosswalk.parquet"),
    #UPSTREAM_IMPERVIOUS = Path(STUDY_DIR, "geo", "usgs_attr_upstream_imperv.parquet")    # don't have this data yet
)

## Select the scenario and variable for evaluation:
We will use some panel widgets to make this easier:

In [9]:
importlib.reload(du)
scenario_selector = du.get_scenario_selector(scenario_name_list=sorted(du.get_scenario_names(eval_scenarios)))  
variable_selector = du.get_variable_selector(variable_list=du.get_scenario_variables(eval_scenarios))   
pn.Row(scenario_selector, variable_selector)

## Filter the data to the region, time period, stream size, threshold (etc.) of interest:

In [10]:
importlib.reload(du)
scenario = du.get_scenario(eval_scenarios, scenario_selector.value, variable_selector.value)
[value_time_slider, reference_time_slider, lead_time_selector, huc2_selector, 
 threshold_selector, order_limit_selector, metric_selector] = du.get_filter_widgets(scenario)

pn.Row(
    pn.Column(huc2_selector, order_limit_selector, threshold_selector, metric_selector),
    pn.Spacer(width=50),    
    pn.Column(     
        pn.Spacer(height=10), value_time_slider,
        pn.Spacer(height=10), reference_time_slider,
        pn.Spacer(height=5), lead_time_selector,
    )
)

Checking value_time range in the parquet files
Checking reference_time range in the parquet files


## Make selections above and run the query in the cell below
### Experiment with the filter selections...

In [13]:
%%time
importlib.reload(du)
metrics_gdf = du.run_teehr_query(
    query_type="metrics",
    scenario=scenario,
    huc_id=huc2_selector.value,
    order_limit=order_limit_selector.value,
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_start=reference_time_slider[1].value_start,    
    reference_time_end=reference_time_slider[1].value_end,
    group_by=['primary_location_id','reference_time'],
    value_min=threshold_selector.value,    
    include_metrics=metric_selector.value,
    attribute_paths=attribute_paths,
    return_query=False,
)
display(metrics_gdf)

FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,measurement_unit,primary_location_id,reference_time,bias,geometry
0,m3/s,usgs-10251290,2023-01-08 06:00:00,0.005012,POINT (-116.23425 35.88569)
23,m3/s,usgs-10251290,2023-01-02 12:00:00,0.004625,POINT (-116.23425 35.88569)
24,m3/s,usgs-10251290,2023-01-01 18:00:00,0.004165,POINT (-116.23425 35.88569)
25,m3/s,usgs-10251290,2023-01-05 18:00:00,0.004853,POINT (-116.23425 35.88569)
26,m3/s,usgs-10251290,2023-01-02 06:00:00,0.004611,POINT (-116.23425 35.88569)
...,...,...,...,...,...
13781,m3/s,usgs-362727116013501,2023-01-04 06:00:00,0.000848,POINT (-116.02725 36.45746)
13782,m3/s,usgs-362727116013501,2023-01-03 12:00:00,0.000846,POINT (-116.02725 36.45746)
13783,m3/s,usgs-362727116013501,2023-01-10 12:00:00,0.000850,POINT (-116.02725 36.45746)
13775,m3/s,usgs-362727116013501,2023-01-06 12:00:00,0.000848,POINT (-116.02725 36.45746)


CPU times: user 20 s, sys: 238 ms, total: 20.2 s
Wall time: 2.95 s


In [14]:
importlib.reload(du)
ts_df = du.run_teehr_query(
    query_type="timeseries",
    scenario=scenario,
    huc_id=huc2_selector.value,
    order_limit=order_limit_selector.value,
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_start=reference_time_slider[1].value_start,    
    reference_time_end=reference_time_slider[1].value_end,
    value_min=threshold_selector.value,    
    attribute_paths=attribute_paths,
    return_query=False,
)
display(ts_df)

FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

Unnamed: 0,reference_time,value_time,secondary_location_id,secondary_value,configuration,measurement_unit,variable_name,primary_value,primary_location_id,lead_time,geometry
0,2023-01-01,2023-01-01 01:00:00,nwm22-20247214,0.0,medium_range_mem1,m3/s,streamflow,0.004248,usgs-10251290,0 days 01:00:00,POINT (-116.23425 35.88569)
1,2023-01-01,2023-01-01 02:00:00,nwm22-20247214,0.0,medium_range_mem1,m3/s,streamflow,0.004248,usgs-10251290,0 days 02:00:00,POINT (-116.23425 35.88569)
2,2023-01-01,2023-01-01 03:00:00,nwm22-20247214,0.0,medium_range_mem1,m3/s,streamflow,0.004248,usgs-10251290,0 days 03:00:00,POINT (-116.23425 35.88569)
3,2023-01-01,2023-01-01 04:00:00,nwm22-20247214,0.0,medium_range_mem1,m3/s,streamflow,0.004248,usgs-10251290,0 days 04:00:00,POINT (-116.23425 35.88569)
4,2023-01-01,2023-01-01 05:00:00,nwm22-20247214,0.0,medium_range_mem1,m3/s,streamflow,0.004248,usgs-10251290,0 days 05:00:00,POINT (-116.23425 35.88569)
...,...,...,...,...,...,...,...,...,...,...,...
3219386,2023-01-11,2023-01-20 20:00:00,nwm22-20245424,0.0,medium_range_mem1,m3/s,streamflow,0.000850,usgs-362727116013501,9 days 20:00:00,POINT (-116.02725 36.45746)
3219387,2023-01-11,2023-01-20 21:00:00,nwm22-20245424,0.0,medium_range_mem1,m3/s,streamflow,0.000850,usgs-362727116013501,9 days 21:00:00,POINT (-116.02725 36.45746)
3219388,2023-01-11,2023-01-20 22:00:00,nwm22-20245424,0.0,medium_range_mem1,m3/s,streamflow,0.000850,usgs-362727116013501,9 days 22:00:00,POINT (-116.02725 36.45746)
3219389,2023-01-11,2023-01-21 00:00:00,nwm22-20245424,0.0,medium_range_mem1,m3/s,streamflow,0.000850,usgs-362727116013501,10 days 00:00:00,POINT (-116.02725 36.45746)
