## Widgets to facilitate evaluation queries

.... brief explanation,   
concise/rapid approach to subset the timeseries you want to evaluate and get either the raw data or summary/comparison metrics  
efficient, enables querying the data 'on the fly' within interactive visualizations



### Install and Import packages

In [None]:
%%capture
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@main'

In [None]:
import teehr.queries.duckdb as tqd

# dashboard functions
import postevent_dashboard_utils as du
from pathlib import Path
import importlib
import pandas as pd
import numpy as np
import geopandas as gpd
import panel as pn
import colorcet as cc
import hvplot
import hvplot.pandas
#hv.extension('bokeh', logo=False)
pn.extension()

## Point to the data that will be used for the evaluation


These are the evaluation scenario definitions - specific variables and configurations to be compared within the overall study.
We need to specify all the parquet files containing the data we want to evaluate, as well as some necessary associated data (geometry, crosswalks, and attributes).
These files dictate the specific study (directory name), forecast configuration, and source of verifying data used in this evaluation.



In [None]:
# overall study directory
STUDY_DIR = Path("/home", "jovyan", "shared", "rti-eval", "post-event-example")

## general units ('english' or 'metric') to show in visualization
viz_units = "metric"

# medium range streamflow forecast evaluation files 
MRF_streamflow = dict(
    scenario_name="medium_range",
    variable="streamflow",
    primary_filepath=Path(STUDY_DIR, "timeseries", "usgs", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "usgs_nwm22_crosswalk.parquet"),
    geometry_filepath=Path(STUDY_DIR, "geo", "usgs_geometry.parquet")
)
# medium range precip forecast evaluation files
MRF_forcing = dict(
    scenario_name="medium_range",
    variable="precipitation",    
    primary_filepath=Path(STUDY_DIR, "timeseries", "forcing_analysis_assim", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_medium_range", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "huc10_huc10_crosswalk.parquet"),                    # the primary and secondary are both HUC10
    geometry_filepath=Path(STUDY_DIR, "geo", "huc10_geometry.parquet"),
)

# short range streamflow forecast evaluation files 
SRF_streamflow = dict(
    scenario_name="short_range",
    variable="streamflow",
    primary_filepath=MRF_streamflow["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

# medium range precip forecast evaluation files
SRF_forcing = dict(
    scenario_name="short_range",
    variable="precipitation",    
    primary_filepath=MRF_forcing["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_short_range", "*.parquet"),
    crosswalk_filepath=MRF_forcing["crosswalk_filepath"],
    geometry_filepath=MRF_forcing["geometry_filepath"],
)

scenario_definitions = [MRF_streamflow, MRF_forcing, SRF_streamflow, SRF_forcing]

attribute_paths = dict(
    usgs_upstream_area=Path(STUDY_DIR, "geo", "usgs_attr_upstream_area.parquet"),
    usgs_ecoregions=Path(STUDY_DIR, "geo", "usgs_attr_ecoregions.parquet"),
    usgs_stream_order=Path(STUDY_DIR, "geo", "usgs_attr_stream_order.parquet"),
    usgs_huc_crosswalk=Path(STUDY_DIR, "geo", "usgs_huc12_crosswalk.parquet"),
)

## Select the scenario and variable for evaluation:
We will use some panel widgets to make this easier:

In [None]:
importlib.reload(du)
scenario_selector = du.get_scenario_selector(scenario_name_list=sorted(du.get_scenario_names(scenario_definitions)))  
variable_selector = du.get_variable_selector(variable_list=du.get_scenario_variables(scenario_definitions))   
pn.Row(scenario_selector, variable_selector)

## Filter the data to the region, time period, stream size, threshold (etc.) of interest:

In [None]:
importlib.reload(du)
scenario = du.get_scenario(scenario_definitions, scenario_selector.value, variable_selector.value)
[value_time_slider, reference_time_slider, lead_time_selector, huc2_selector, 
 threshold_selector, order_limit_selector, metric_selector] = du.get_filter_widgets(scenario)

pn.Row(
    pn.Column(huc2_selector, order_limit_selector, threshold_selector, metric_selector),
    pn.Spacer(width=50),    
    pn.Column(     
        pn.Spacer(height=10), value_time_slider,
        pn.Spacer(height=10), reference_time_slider,
        pn.Spacer(height=5), lead_time_selector,
    )
)

## Make selections above and run the query in the cell below
### Experiment with the filter selections...

In [None]:
%%time
importlib.reload(du)
gdf = du.run_teehr_query(
    query_type="metrics",
    scenario=scenario,
    huc_id=huc2_selector.value,
    order_limit=order_limit_selector.value,
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_start=reference_time_slider[1].value_start,    
    reference_time_end=reference_time_slider[1].value_end,
    group_by=['primary_location_id'],
    order_by=['primary_location_id'],
    value_min=threshold_selector.value,    
    include_metrics=metric_selector.value,
    attribute_paths=attribute_paths,
    return_query=False,
)
display(gdf.head())

In [None]:
gdf = gdf.to_crs("EPSG:3857")
gdf['relative_peak_error'] = gdf['max_value_delta']/gdf['primary_maximum']
gdf.hvplot.points(c='relative_peak_error', cmap=cc.CET_D1A[::-1], clim=(-1,1), width=500, height=400,
                             title="Relative Peak Error", size=10, xaxis = None, yaxis = None, tiles='CartoLight', 
                             hover_cols=['primary_location_id', 'primary_maximum'], cnorm='linear')

### Choose a gage to to explore more closely

In [None]:
usgs_id = 'usgs-11451715'

### Get the observed and forecast streamflow time series

In [None]:
importlib.reload(du)
flow_df = du.run_teehr_query(
    query_type="timeseries",
    scenario=scenario,
    location_id=usgs_id,
    order_limit=order_limit_selector.value,
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_start=reference_time_slider[1].value_start,    
    reference_time_end=reference_time_slider[1].value_end,
    value_min=threshold_selector.value,    
    attribute_paths=attribute_paths,
    return_query=False,
    include_geometry=False,
)
display(flow_df.head())

### Plot all the timeseries

In [None]:
flow_df = flow_df.rename(columns={'primary_value':'obs_flow','secondary_value':'fcst_flow'})
ref_times = sorted(flow_df['reference_time'].unique())
cmap = cc.rainbow[::-1]
cstep = int(np.floor(len(cmap) / len(ref_times)))

flow_obs = flow_df.hvplot(x='value_time', y='obs_flow', label = 'observed', ylabel='Flow (cms)', color = 'black', line_width=4)   
hydrographs = flow_obs
for t, time in enumerate(ref_times):
    df_t = flow_df[flow_df['reference_time'] == time]
    ci = cmap[cstep * t]
    fcst = df_t.hvplot(x='value_time', y='fcst_flow', color = [ci])
    hydrographs = hydrographs * fcst

hydrographs * flow_obs

### Get observed and forecast precipitation time series in the area

In [None]:
# read the usgs-huc crosswalk, get the HUC10 containing the above gage
cross = pd.read_parquet(attribute_paths['usgs_huc_crosswalk'])
huc12_id = cross.loc[cross['primary_location_id']==usgs_id, 'secondary_location_id'].iloc[0]
huc10_id = "-".join(['huc10', huc12_id.split("-")[1][:10]])

pcp_df = du.run_teehr_query(
    query_type="timeseries",
    scenario=MRF_forcing,
    location_id=huc10_id,
    order_limit=order_limit_selector.value,
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_start=reference_time_slider[1].value_start,    
    reference_time_end=reference_time_slider[1].value_end,
    value_min=threshold_selector.value,    
    attribute_paths=attribute_paths,
    return_query=False,
    include_geometry=False,
)
display(pcp_df.head())

### Link precipitation to the streamflow plots

In [None]:
pcp_df = pcp_df.rename(columns={'primary_value':'obs_pcp','secondary_value':'fcst_pcp'})
pcp_obs = pcp_df.hvplot(x='value_time', y='obs_pcp', label='observed', ylabel='Precip (mm)', color = 'black', line_width=3)   
hyetograph = pcp_obs
for t, time in enumerate(ref_times):
    df_t = pcp_df[pcp_df['reference_time'] == time]
    ci = cmap[cstep * t]
    fcst = df_t.hvplot(x='value_time', y='fcst_pcp', color = [ci])
    hyetograph = hyetograph * fcst

((hyetograph * pcp_obs) + (hydrographs * flow_obs)).cols(1)