## TEEHR Post-Event Example 1
### Explore Forecast Evaluation Trends and Relationships

Add more text description about this use case....





### Install and Import packages

In [None]:
%%capture
!pip install 'teehr @ git+https://@github.com/RTIInternational/teehr@main'

In [None]:
import teehr.queries.duckdb as tqd

import postevent_dashboard_utils as du
import importlib
from datetime import datetime, timedelta
from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import holoviews as hv
from holoviews.element import tiles
import geoviews as gv
import panel as pn
import hvplot
import hvplot.pandas
import colorcet as cc

hv.extension('bokeh', logo=False)

In [None]:
# evaluation study directory
STUDY_DIR = Path("/home", "jovyan", "shared", "rti-eval", "post-event-example")

## general units ('english' or 'metric') to show in visualization
viz_units = "metric"

# evaluation scenario definitions - specific variables and configurations to be compared within the overall study

# medium range streamflow forecast evaluation files 
MRF_streamflow = dict(
    scenario_name="medium_range",
    variable="streamflow",
    primary_filepath=Path(STUDY_DIR, "timeseries", "usgs", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "usgs_nwm22_crosswalk.parquet"),
    geometry_filepath=Path(STUDY_DIR, "geo", "usgs_geometry.parquet")
)

# medium range precip forecast evaluation files
MRF_forcing = dict(
    scenario_name="medium_range",
    variable="precipitation",    
    primary_filepath=Path(STUDY_DIR, "timeseries", "forcing_analysis_assim", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_medium_range", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "huc10_huc10_crosswalk.parquet"),                    # the primary and secondary are both HUC10
    geometry_filepath=Path(STUDY_DIR, "geo", "huc10_geometry.parquet"),
)

# short range streamflow forecast evaluation files 
SRF_streamflow = dict(
    scenario_name="short_range",
    variable="streamflow",
    primary_filepath=MRF_streamflow["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

# medium range precip forecast evaluation files
SRF_forcing = dict(
    scenario_name="short_range",
    variable="precipitation",    
    primary_filepath=MRF_forcing["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_short_range", "*.parquet"),
    crosswalk_filepath=MRF_forcing["crosswalk_filepath"],
    geometry_filepath=MRF_forcing["geometry_filepath"],
)

scenario_definitions = [MRF_streamflow, MRF_forcing, SRF_streamflow, SRF_forcing]

attribute_paths = dict(
    usgs_upstream_area=Path(STUDY_DIR, "geo", "usgs_attr_upstream_area.parquet"),
    usgs_ecoregions=Path(STUDY_DIR, "geo", "usgs_attr_ecoregions.parquet"),
    usgs_stream_order=Path(STUDY_DIR, "geo", "usgs_attr_stream_order.parquet"),
    usgs_huc_crosswalk=Path(STUDY_DIR, "geo", "usgs_huc12_crosswalk.parquet"),
)
attribute_df = du.combine_attributes(attribute_paths,viz_units)

## Select the forecast scenario, time periods and region of interest

In [None]:
scenario_selector = du.get_scenario_selector(scenario_name_list=sorted(du.get_scenario_names(scenario_definitions)))
scenario_selector

In [None]:
importlib.reload(du)
streamflow_scenario = du.get_scenario(scenario_definitions, scenario_name=scenario_selector.value, variable='streamflow')
forcing_scenario = du.get_scenario(scenario_definitions, scenario_name=scenario_selector.value, variable='precipitation')
widgets = du.get_filter_widgets(scenario=streamflow_scenario, include_widgets=['value_time','reference_time','huc2','metrics'])
pn.Row(
    pn.Column(widgets['huc2'], widgets['metrics']),
    pn.Spacer(width=50),
    pn.Column(widgets['value_time'], widgets['reference_time'])
)

### Get the observed and forecast peak flows for the above specifications and join with some attributes

In [None]:
%%time
importlib.reload(du)

gdf = du.run_teehr_query(
    query_type="metrics",
    scenario=streamflow_scenario,
    huc_id=widgets['huc2'].value,
    value_time_start=widgets['value_time'][1].value_start,    
    value_time_end=widgets['value_time'][1].value_end,    
    reference_time_start=widgets['reference_time'][1].value_start,    
    reference_time_end=widgets['reference_time'][1].value_end,
    group_by=['primary_location_id','reference_time'],
    order_by=['primary_location_id','reference_time'], 
    include_metrics=widgets['metrics'].value,
    value_min=0,
    attribute_paths=attribute_paths,
)
display(gdf.head())
len(gdf)

# convert units, add attributes
gdf = du.convert_query_to_viz_units(gdf, viz_units, streamflow_scenario['variable'])
attribute_df = du.combine_attributes(attribute_paths, viz_units)
gdf = du.merge_attr_to_gdf(gdf, attribute_df)

# replace geometry with easting and northing to facilitate linked plots
df = gdf[[c for c in gdf.columns if c not in ['geometry','measurement_unit']]].copy()
df['latitude'] = gdf.geometry.y
df['easting'] = gdf.to_crs("EPSG:3857").geometry.x
df['northing'] = gdf.to_crs("EPSG:3857").geometry.y

if all(x in df.columns for x in ['max_value_delta', 'primary_maximum']):
    df['max_perc_diff'] = df['max_value_delta']/df['primary_maximum']*100
    df.loc[df['max_perc_diff'] == np.inf, 'max_perc_diff'] = np.nan
    
if all(x in df.columns for x in ['max_value_timedelta']):
    df['max_time_diff'] = (df['max_value_timedelta'] / np.timedelta64(1, 'h')).astype(int)

# turn the string ecoregion into unique integers to enable histograms
eco_df = pd.DataFrame(df['ecoregion_L2'].unique())
eco_df['num']=eco_df[0].str[0:4].astype('float')
eco_df = eco_df.sort_values('num').reset_index()
eco_list=list(eco_df[0])
df['ecoregion_int'] = [eco_list.index(e)+1 for e in df['ecoregion_L2']]

display(df.head())

### Build an interactive dashboard to explore the data

In [None]:
importlib.reload(du)

color_column_options = ['stream_order','ecoregion_int','upstream_area','latitude','max_perc_diff','max_time_diff']
color_variable_selector = pn.widgets.Select(name='Color Variable', 
                                          options=du.get_metric_selector_dict(color_column_options,scenario_selector.value),
                                          value=color_column_options[0], 
                                          width_policy="fit")

scatter_variable_options=['Peak Flow','Peak Time']
scatter_variable_selector = pn.widgets.Select(name='Scatter Variable', 
                                          options=scatter_variable_options, 
                                          value=scatter_variable_options[0], 
                                          width_policy="fit")

basemap = tiles.CartoLight().redim(x='easting', y='northing')
df_sub = df.drop_duplicates(subset=['primary_location_id'], keep='first')
points = pn.bind(
    du.get_points,
    df=df_sub, 
    color_variable=color_variable_selector.param.value, 
    scenario_name=scenario_selector.value,
    units=viz_units,
    opts=dict(width=500, height=400)
)
scatter = pn.bind(
    du.get_scatter,
    df=df, 
    scatter_variable=scatter_variable_selector.param.value, 
    color_variable=color_variable_selector.param.value, 
    scenario_name=scenario_selector.value,
    units=viz_units,
    opts=dict(width=400, height=400)
)
area_hist = du.get_histogram(df, column='upstream_area', nbins=50)
peak_diff_hist = du.get_histogram(df, column='max_perc_diff', nbins=50)
peak_timediff_hist = du.get_histogram(df, column='max_time_diff', nbins=50)
eco_hist =   du.get_categorical_histogram(df, column = 'ecoregion_int', labels=eco_df['num'])
order_hist = du.get_categorical_histogram(df, column = 'stream_order')

area_hist.         opts(width=300, height=200)
peak_diff_hist.    opts(width=300, height=200)
peak_timediff_hist.opts(width=300, height=200)
eco_hist.          opts(width=250, height=200)
order_hist.        opts(width=250, height=200)

scenario_text = du.get_scenario_text(scenario_selector.value)
subtitle = f"Example 1: Forecast Data Exploration<br> - {scenario_text}"
header = du.get_dashboard_header(subtitle)

ls = hv.link_selections.instance()

pn.Column(
    pn.Column(pn.Spacer(height=10), header, width=1100),
    pn.Row(
        pn.Spacer(height=20),
        pn.Column(pn.Spacer(height=20), scatter_variable_selector, color_variable_selector, width=200),
        pn.Row(ls(hv.DynamicMap(scatter)) + basemap*ls(hv.DynamicMap(points))),
    ),  
    pn.Row(ls(peak_diff_hist + peak_timediff_hist + order_hist + eco_hist)),
)