# Post-Event Dashboard 1

This dashboard uses Holoviews functionality to link visualizations and explore the data.  This example queries all reference times within a single event period and returns time series characteristics (e.g. peak and time to peak) for each individual reference time (groupby reference time).  The goal of this dashboard is to facilitate qualitative exploration of trends and agreement in the timeseries characteristics for user selected subsets based on attributes.

In [1]:
import teehr.queries.duckdb as tqd
import postevent_dashboard_utils as du
from datetime import datetime, timedelta
from pathlib import Path
import numpy as np
import pandas as pd
import geopandas as gpd
import holoviews as hv
from holoviews.element import tiles
import geoviews as gv
import panel as pn
import hvplot
import hvplot.pandas
import colorcet as cc
hv.extension('bokeh', logo=False)

import importlib
importlib.reload(du)

<module 'postevent_dashboard_utils' from 'C:\\repos\\git\\post-event\\notebooks\\postevent_dashboard_utils.py'>

## Define the sets parquet files for each forecast configuration and variable
We use dictionaries to define multiple sets of parquet files needed to evaluate operational NWM short- and medium-range streamflow and precipitation forecasts. These files define the source of verifying data (primary_filepath), data to evaluate (secondary_filepath), as well as the necessary geometry, crosswalk, and attributes.

In [3]:
# read config file for root directory definition
config_filename = 'post-event-config.json'
CACHE_ROOT = du.read_cache_dir(Path(Path().absolute(), config_filename))
GEO_DIR = Path(CACHE_ROOT, 'geo')
TS_DIR = Path(CACHE_ROOT, 'timeseries')

# short range streamflow forecast evaluation files 
SRF_streamflow = dict(
    nwm_config="short_range",
    variable="streamflow",
    primary_filepath=Path(TS_DIR, "usgs", "*.parquet"),
    secondary_filepath=Path(TS_DIR, "short_range", "*.parquet"),
    crosswalk_filepath=Path(GEO_DIR, "usgs_nwm22_crosswalk.conus.parquet"),
    geometry_filepath=Path(GEO_DIR, "usgs_point_geometry.conus.parquet")
)
# medium range streamflow forecast evaluation files 
MRF_streamflow = dict(
    nwm_config="medium_range_mem1",
    variable="streamflow",
    primary_filepath=Path(TS_DIR, "usgs", "*.parquet"),
    secondary_filepath=Path(TS_DIR, "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=SRF_streamflow["crosswalk_filepath"],
    geometry_filepath=SRF_streamflow["geometry_filepath"],
)
attribute_paths = dict(
    usgs_upstream_area=Path(GEO_DIR, "usgs_attr_upstream_area.conus.parquet"),
    usgs_ecoregions=Path(GEO_DIR, "usgs_attr_ecoregions.conus.parquet"),
    usgs_stream_order=Path(GEO_DIR, "usgs_attr_stream_order.conus.parquet"),
    usgs_huc_crosswalk=Path(GEO_DIR, "usgs_huc12_crosswalk.conus.parquet"),
)

# put the scenarios in a list for widget purposes
scenario_definitions = [SRF_streamflow, MRF_streamflow]
for s in scenario_definitions:
    s['scenario_name'] = '-'.join([s['nwm_config'], s['variable']])

## general units ('english' or 'metric') to show in visualization
viz_units = "metric"

### Select the forecast scenario and evaluation dates (reference time range)
Run the cell below to generate widgets to facilitate selections, select values then move to the next cell (do not rerun this cell after making selections).  
Reference times will be included from 0z on the start date through the last available reference time on the end date.

In [36]:
importlib.reload(du)

scenario_selector = du.get_scenario_selector(scenario_name_list=du.get_scenario_names(scenario_definitions))
start_picker = pn.widgets.DatePicker(name='Start Date', value=datetime.now().date())
end_picker = pn.widgets.DatePicker(name='End Date', value=datetime.now().date())
huc2_selector = du.get_huc2_selector()
metric_selector = du.get_postevent_metric_selector(variable='streamflow')

pn.Row(
    pn.Column(scenario_selector, huc2_selector, metric_selector, pn.Spacer(height=50)),
    pn.Column(start_picker, end_picker),
)

### Read and validate the selections

In [46]:
scenario = du.get_scenario(scenario_definitions, scenario_name=scenario_selector.value, variable='streamflow')
eval_dates = du.get_nwm_dates2(scenario['nwm_config'], start_picker.value, end_picker.value)

5 days selected - data will be included corresponding to short_range reference times from 2023-07-06 00:00:00 through 2023-07-10 23:00:00


### Get observed and forecast timeseries characteristics and join with some attributes

In [49]:
%%time
print('Reading parquet files and calculating metrics...')
query_gdf = du.run_teehr_query(
    query_type="metrics",
    scenario=scenario,
    huc_id=huc2_selector.value,
    value_time_start=eval_dates['value_time_start'],    
    value_time_end=eval_dates['value_time_end'],    
    reference_time_start=eval_dates['reference_time_start'],
    reference_time_end=eval_dates['reference_time_end'],
    group_by=['primary_location_id','reference_time'],
    order_by=['primary_location_id','reference_time'], 
    include_metrics=metric_selector.value,
    value_min=0,
    attribute_paths=attribute_paths,
)
display(query_gdf.head())

# convert units, add attributes
query_gdf = du.convert_query_to_viz_units(query_gdf, viz_units, scenario['variable'])
attribute_df = du.combine_attributes(attribute_paths, viz_units)
gdf = du.merge_attr_to_gdf(query_gdf, attribute_df)

# replace geometry with easting and northing to facilitate linked plots
df = gdf[[c for c in gdf.columns if c not in ['geometry','measurement_unit']]].copy()
df['latitude'] = gdf.geometry.y
df['easting'] = gdf.to_crs("EPSG:3857").geometry.x
df['northing'] = gdf.to_crs("EPSG:3857").geometry.y

# calculate the peak flow percent difference and peak time difference in hours
if all(x in df.columns for x in ['max_value_delta', 'primary_maximum']):
    df['max_perc_diff'] = df['max_value_delta']/df['primary_maximum']*100
    df.loc[df['max_perc_diff'] == np.inf, 'max_perc_diff'] = np.nan
if all(x in df.columns for x in ['max_value_timedelta']):
    df['max_time_diff'] = (df['max_value_timedelta'] / np.timedelta64(1, 'h')).astype(int)

# turn the string ecoregion into unique integers to enable histograms
eco_df = pd.DataFrame(df['ecoregion_L2'].unique())
eco_df['num']=eco_df[0].str[0:4].astype('float')
eco_df = eco_df.sort_values('num').reset_index()
eco_list=list(eco_df[0])
df['ecoregion_int'] = [eco_list.index(e)+1 for e in df['ecoregion_L2']]

display(df.head())

Reading parquet files and calculating metrics...


Unnamed: 0,primary_location_id,reference_time,measurement_unit,primary_maximum,secondary_maximum,max_value_delta,primary_max_value_time,secondary_max_value_time,max_value_timedelta,geometry
0,usgs-01010000,2023-07-06 00:00:00,m3/s,25.088726,25.799999,0.711273,2023-07-06 01:00:00,2023-07-06 05:00:00,0 days 04:00:00,POINT (-69.71556 46.70056)
1,usgs-01010000,2023-07-06 01:00:00,m3/s,24.805557,25.729999,0.924442,2023-07-06 02:00:00,2023-07-06 06:00:00,0 days 04:00:00,POINT (-69.71556 46.70056)
2,usgs-01010000,2023-07-06 02:00:00,m3/s,24.805557,25.729999,0.924442,2023-07-06 03:00:00,2023-07-06 06:00:00,0 days 03:00:00,POINT (-69.71556 46.70056)
3,usgs-01010000,2023-07-06 03:00:00,m3/s,24.805557,25.619999,0.814442,2023-07-06 04:00:00,2023-07-06 08:00:00,0 days 04:00:00,POINT (-69.71556 46.70056)
4,usgs-01010000,2023-07-06 04:00:00,m3/s,24.522388,25.569999,1.047611,2023-07-06 05:00:00,2023-07-06 09:00:00,0 days 04:00:00,POINT (-69.71556 46.70056)


Unnamed: 0,primary_location_id,reference_time,primary_maximum,secondary_maximum,max_value_delta,primary_max_value_time,secondary_max_value_time,max_value_timedelta,upstream_area,ecoregion_L2,stream_order,latitude,easting,northing,max_perc_diff,max_time_diff,ecoregion_int
0,usgs-01010000,2023-07-06 00:00:00,25.088726,25.799999,0.711273,2023-07-06 01:00:00,2023-07-06 05:00:00,0 days 04:00:00,3516.43786,5.3 ATLANTIC HIGHLANDS,5,46.700556,-7760700.0,5893333.0,2.835032,4,1
1,usgs-01010000,2023-07-06 01:00:00,24.805557,25.729999,0.924442,2023-07-06 02:00:00,2023-07-06 06:00:00,0 days 04:00:00,3516.43786,5.3 ATLANTIC HIGHLANDS,5,46.700556,-7760700.0,5893333.0,3.726754,4,1
2,usgs-01010000,2023-07-06 02:00:00,24.805557,25.729999,0.924442,2023-07-06 03:00:00,2023-07-06 06:00:00,0 days 03:00:00,3516.43786,5.3 ATLANTIC HIGHLANDS,5,46.700556,-7760700.0,5893333.0,3.726754,3,1
3,usgs-01010000,2023-07-06 03:00:00,24.805557,25.619999,0.814442,2023-07-06 04:00:00,2023-07-06 08:00:00,0 days 04:00:00,3516.43786,5.3 ATLANTIC HIGHLANDS,5,46.700556,-7760700.0,5893333.0,3.283305,4,1
4,usgs-01010000,2023-07-06 04:00:00,24.522388,25.569999,1.047611,2023-07-06 05:00:00,2023-07-06 09:00:00,0 days 04:00:00,3516.43786,5.3 ATLANTIC HIGHLANDS,5,46.700556,-7760700.0,5893333.0,4.272059,4,1


CPU times: total: 7.81 s
Wall time: 2.01 s


### Build an interactive dashboard to explore the data

In [59]:
importlib.reload(du)

color_column_options = ['stream_order','ecoregion_int','upstream_area','latitude','max_perc_diff','max_time_diff']
color_variable_selector = pn.widgets.Select(name='Color Variable', 
                                          options=du.get_metric_selector_dict(color_column_options,scenario_selector.value),
                                          value=color_column_options[0], 
                                          width=180)

scatter_variable_options=['Peak Flow','Peak Time']
scatter_variable_selector = pn.widgets.Select(name='Scatter Variable', 
                                          options=scatter_variable_options, 
                                          value=scatter_variable_options[0], 
                                          width=180)

basemap = tiles.CartoLight().redim(x='easting', y='northing')
df_sub = df.drop_duplicates(subset=['primary_location_id'], keep='first')
points = pn.bind(
    du.get_points,
    df=df_sub, 
    color_variable=color_variable_selector.param.value, 
    scenario_name=scenario_selector.value,
    units=viz_units,
    opts=dict(width=500, height=400)
)
scatter = pn.bind(
    du.get_scatter,
    df=df, 
    scatter_variable=scatter_variable_selector.param.value, 
    color_variable=color_variable_selector.param.value, 
    scenario_name=scenario_selector.value,
    units=viz_units,
    opts=dict(width=400, height=400)
)
area_hist = du.get_histogram(df, column='upstream_area', nbins=50)
peak_diff_hist = du.get_histogram(df, column='max_perc_diff', nbins=50)
peak_timediff_hist = du.get_histogram(df, column='max_time_diff', nbins=50)
eco_hist =   du.get_categorical_histogram(df, column = 'ecoregion_int', labels=eco_df['num'])
order_hist = du.get_categorical_histogram(df, column = 'stream_order')

area_hist.         opts(width=300, height=200)
peak_diff_hist.    opts(width=300, height=200)
peak_timediff_hist.opts(width=300, height=200)
eco_hist.          opts(width=250, height=200)
order_hist.        opts(width=250, height=200)

scenario_text = du.get_scenario_text(scenario_selector.value)
subtitle = f"Example 1: Forecast Data Exploration<br> - {scenario_text}"
header = du.get_dashboard_header(subtitle)

ls = hv.link_selections.instance()

pn.Column(
    pn.Column(pn.Spacer(height=10), header, width=1100),
    pn.Row(
        pn.Spacer(height=20),
        pn.Column(pn.Spacer(height=20), scatter_variable_selector, color_variable_selector, width=220),
        pn.Row(ls(hv.DynamicMap(scatter)) + basemap*ls(hv.DynamicMap(points))),
    ),  
    pn.Row(ls(peak_diff_hist + peak_timediff_hist + order_hist + eco_hist)),
)

###  Color map not working for all but stream order - color bars do not align with colors/values

In [55]:
df_sub[color_variable_selector.value]

0        46.700556
120      46.893889
240      47.113056
360      47.069722
480      47.206979
           ...    
35888    41.314089
36008    41.163767
36128    41.174322
36248    41.066111
36368    41.027297
Name: latitude, Length: 305, dtype: float64

In [57]:
df_sub

Unnamed: 0,primary_location_id,reference_time,primary_maximum,secondary_maximum,max_value_delta,primary_max_value_time,secondary_max_value_time,max_value_timedelta,upstream_area,ecoregion_L2,stream_order,latitude,easting,northing,max_perc_diff,max_time_diff,ecoregion_int
0,usgs-01010000,2023-07-06,25.088726,25.799999,0.711273,2023-07-06 01:00:00,2023-07-06 05:00:00,0 days 04:00:00,3516.437860,5.3 ATLANTIC HIGHLANDS,5,46.700556,-7.760700e+06,5.893333e+06,2.835032,4,1
120,usgs-01010070,2023-07-06,2.120932,2.160000,0.039068,2023-07-06 01:00:00,2023-07-06 01:00:00,0 days 00:00:00,505.346065,5.3 ATLANTIC HIGHLANDS,5,46.893889,-7.764720e+06,5.924771e+06,1.842025,0,1
240,usgs-01010500,2023-07-06,56.067356,55.759999,-0.307357,2023-07-06 01:00:00,2023-07-06 01:00:00,0 days 00:00:00,7038.501787,5.3 ATLANTIC HIGHLANDS,5,47.113056,-7.690847e+06,5.960547e+06,-0.548193,0,1
360,usgs-01011000,2023-07-06,49.554482,49.539999,-0.014483,2023-07-06 01:00:00,2023-07-06 01:00:00,0 days 00:00:00,3190.470360,5.3 ATLANTIC HIGHLANDS,5,47.069722,-7.689889e+06,5.953462e+06,-0.029226,0,1
480,usgs-01011500,2023-07-06,27.410707,6.350000,-21.060708,2023-07-06 01:00:00,2023-07-06 01:00:00,0 days 00:00:00,1357.351126,8.1 MIXED WOOD PLAINS,5,47.206979,-7.676194e+06,5.975924e+06,-76.833871,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35888,usgs-012095493,2023-07-06,0.365287,0.350000,-0.015287,2023-07-06 01:00:00,2023-07-06 01:00:00,0 days 00:00:00,7.934090,8.1 MIXED WOOD PLAINS,1,41.314089,-8.181192e+06,5.058781e+06,-4.185010,0,2
36008,usgs-01209700,2023-07-06,1.288417,1.190000,-0.098417,2023-07-06 01:00:00,2023-07-06 01:00:00,0 days 00:00:00,73.530010,8.1 MIXED WOOD PLAINS,4,41.163767,-8.173026e+06,5.036527e+06,-7.638565,0,2
36128,usgs-01209761,2023-07-06,0.028883,0.030000,0.001117,2023-07-06 01:00:00,2023-07-06 01:00:00,0 days 00:00:00,2.639180,8.1 MIXED WOOD PLAINS,2,41.174322,-8.183216e+06,5.038088e+06,3.866665,0,2
36248,usgs-01209901,2023-07-06,0.651287,0.660000,0.008713,2023-07-06 01:00:00,2023-07-06 08:00:00,0 days 07:00:00,78.742400,8.1 MIXED WOOD PLAINS,4,41.066111,-8.187468e+06,5.022098e+06,1.337742,7,2


In [58]:
df_sub['ecoregion_int'].unique()

array([1, 2, 3], dtype=int64)