## Widgets to facilitate evaluation queries

.... brief explanation,   
concise/rapid approach to subset the timeseries you want to evaluate and get either the raw data or summary/comparison metrics  
efficient, enables querying the data 'on the fly' within interactive visualizations



### Install and Import packages

In [None]:
%%capture
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@main'

In [None]:
import teehr.queries.duckdb as tqd

# dashboard functions
import postevent_dashboard_utils as du
from pathlib import Path
import importlib
import panel as pn
import geopandas as gpd
import colorcet as cc
#hv.extension('bokeh', logo=False)
pn.extension()

## Point to the data that will be used for the evaluation


These are the evaluation scenario definitions - specific variables and configurations to be compared within the overall study.
We need to specify all the parquet files containing the data we want to evaluate, as well as some necessary associated data (geometry, crosswalks, and attributes).
These files dictate the specific study (directory name), forecast configuration, and source of verifying data used in this evaluation.



In [None]:
# overall study directory
STUDY_DIR = Path("/home", "jovyan", "shared", "rti-eval", "post-event-example")

## general units ('english' or 'metric') to show in visualization
viz_units = "metric"

# medium range streamflow forecast evaluation files 
MRF_streamflow = dict(
    scenario_name="medium_range",
    variable="streamflow",
    primary_filepath=Path(STUDY_DIR, "timeseries", "usgs", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "usgs_nwm22_crosswalk.parquet"),
    geometry_filepath=Path(STUDY_DIR, "geo", "usgs_geometry.parquet")
)
# medium range precip forecast evaluation files
MRF_forcing = dict(
    scenario_name="medium_range",
    variable="precipitation",    
    primary_filepath=Path(STUDY_DIR, "timeseries", "forcing_analysis_assim", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_medium_range", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "huc10_huc10_crosswalk.parquet"),                    # the primary and secondary are both HUC10
    geometry_filepath=Path(STUDY_DIR, "geo", "huc10_geometry.parquet"),
)

# short range streamflow forecast evaluation files 
SRF_streamflow = dict(
    scenario_name="short_range",
    variable="streamflow",
    primary_filepath=MRF_streamflow["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

# medium range precip forecast evaluation files
SRF_forcing = dict(
    scenario_name="short_range",
    variable="precipitation",    
    primary_filepath=MRF_forcing["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_short_range", "*.parquet"),
    crosswalk_filepath=MRF_forcing["crosswalk_filepath"],
    geometry_filepath=MRF_forcing["geometry_filepath"],
)

scenario_definitions = [MRF_streamflow, MRF_forcing, SRF_streamflow, SRF_forcing]

attribute_paths = dict(
    usgs_upstream_area=Path(STUDY_DIR, "geo", "usgs_attr_upstream_area.parquet"),
    usgs_ecoregions=Path(STUDY_DIR, "geo", "usgs_attr_ecoregions.parquet"),
    usgs_stream_order=Path(STUDY_DIR, "geo", "usgs_attr_stream_order.parquet"),
    usgs_huc_crosswalk=Path(STUDY_DIR, "geo", "usgs_huc12_crosswalk.parquet"),
)

## Select the scenario and variable for evaluation:
We will use some panel widgets to make this easier:

In [None]:
importlib.reload(du)
scenario_selector = du.get_scenario_selector(scenario_name_list=sorted(du.get_scenario_names(scenario_definitions)))  
variable_selector = du.get_variable_selector(variable_list=du.get_scenario_variables(scenario_definitions))   
pn.Row(scenario_selector, variable_selector)

## Filter the data to the region, time period, stream size, threshold (etc.) of interest:

In [None]:
importlib.reload(du)
scenario = du.get_scenario(eval_scenarios, scenario_selector.value, variable_selector.value)
[value_time_slider, reference_time_slider, lead_time_selector, huc2_selector, 
 threshold_selector, order_limit_selector, metric_selector] = du.get_filter_widgets(scenario)

pn.Row(
    pn.Column(huc2_selector, order_limit_selector, threshold_selector, metric_selector),
    pn.Spacer(width=50),    
    pn.Column(     
        pn.Spacer(height=10), value_time_slider,
        pn.Spacer(height=10), reference_time_slider,
        pn.Spacer(height=5), lead_time_selector,
    )
)

## Make selections above and run the query in the cell below
### Experiment with the filter selections...

In [None]:
%%time
importlib.reload(du)
metrics_gdf = du.run_teehr_query(
    query_type="metrics",
    scenario=scenario,
    huc_id=huc2_selector.value,
    order_limit=order_limit_selector.value,
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_start=reference_time_slider[1].value_start,    
    reference_time_end=reference_time_slider[1].value_end,
    group_by=['primary_location_id','reference_time'],
    order_by=['primary_location_id','reference_time'],
    value_min=threshold_selector.value,    
    include_metrics=metric_selector.value,
    attribute_paths=attribute_paths,
    return_query=False,
)
display(metrics_gdf.head())

In [None]:
importlib.reload(du)
ts_df = du.run_teehr_query(
    query_type="timeseries",
    scenario=scenario,
    huc_id=huc2_selector.value,
    order_limit=order_limit_selector.value,
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_start=reference_time_slider[1].value_start,    
    reference_time_end=reference_time_slider[1].value_end,
    value_min=threshold_selector.value,    
    attribute_paths=attribute_paths,
    return_query=False,
)
display(ts_df.head())

In [None]:
import hvplot
import hvplot.pandas
import holoviews as hv
from holoviews.element import tiles

In [None]:
units = viz_units
gdf = du.convert_query_to_viz_units(metrics_gdf, units, scenario['variable'])
attribute_df = du.combine_attributes(attribute_paths,units)
gdf = du.merge_attr_to_gdf(gdf, attribute_df)
gdf['max_perc_diff'] = gdf['max_value_delta']/gdf['primary_maximum']*100

In [None]:
df = gdf[['primary_location_id','reference_time','primary_maximum','secondary_maximum','max_perc_diff','upstream_area_value','ecoregion_L2_value','stream_order_value']].copy()
df['easting'] = gdf.to_crs("EPSG:3857").geometry.x
df['northing'] = gdf.to_crs("EPSG:3857").geometry.y
#df = df.loc[(df['primary_maximum']<5000) & (df['primary_maximum']<5000) & \
#            (df['secondary_maximum']>0) & (df['primary_maximum']>0)]

In [None]:
df

In [None]:
measure='max_perc_diff'
width = 700
basemap = tiles.OSM().redim(x='easting', y='northing') #gv.tile_sources.CartoLight
points_hv = hv.Points(df, kdims=['easting','northing'], vdims=[measure, 'secondary_maximum', 'primary_maximum','upstream_area_value','ecoregion_L2_value','stream_order_value'])
points_hv.opts(width=width, height=400, color=hv.dim(measure), clim=(-100,100),
    cmap=cc.CET_D1A[::-1], size = 5, xaxis=None, yaxis=None, colorbar=True)

diff_hist = df.hvplot.hist(y=measure, width=width, bins=100, bin_range=(-100, 1000), height=200, xlabel='% Difference Peak Flow')
#diff_scat = df.hvplot.scatter(x='secondary_average', y='primary_average', vdims=[measure,'easting','northing'], alpha=0.2, width=400, height=400, xlabel='Forecast Peak', ylabel='Observed Peak')
diff_scat = hv.Scatter(df, kdims=['secondary_maximum'], vdims=['primary_maximum','easting','northing',measure])
diff_scat.opts(alpha=0.2, width=400, height=400, xlabel='Forecast Peak', ylabel='Observed Peak')
ls = hv.link_selections.instance()
ls((basemap*points_hv + diff_scat + diff_hist)).cols(2)