## TEEHR Example 2 - Explore Forecast Data from a Recent Flood Event

Add more text description about this use case....





### Install and Import packages

In [None]:
%%capture
!pip install spatialpandas colormap colorcet duckdb
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@main'
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@39d6627e4f49b0bdeab3a4c4e8837e6ce5a15f78'

In [None]:
!pip install 'teehr @ git+https://4@github.com/RTIInternational/teehr@main'

In [None]:
import teehr.queries.duckdb as tqd
import teehr.queries.utils as tqu
import dashboard_utils as du

import importlib
from datetime import timedelta
from pathlib import Path
import geopandas as gpd
import pandas as pd
import spatialpandas as spd
import numpy as np
import pathlib
from typing import List
import duckdb as ddb

import hvplot
import hvplot.pandas
import holoviews as hv
from holoviews.element import tiles
import geoviews as gv
import panel as pn
import colorcet as cc

from holoviews.operation.datashader import rasterize, spread
hv.extension('bokeh', logo=False)

In [None]:
# evaluation study directory
STUDY_DIR = Path("/home", "jovyan", "shared", "rti-eval", "post-event-example")

## specify general units (english or metric) to show in visualization
viz_units = "metric"

# evaluation scenario definitions - specific variables and configurations to be compared within the overall study

# medium range streamflow forecast evaluation files 
MRF_streamflow = dict(
    scenario_name="medium_range",
    variable="streamflow",
    primary_filepath=Path(STUDY_DIR, "timeseries", "usgs", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "usgs_nwm22_crosswalk.parquet"),
    geometry_filepath=Path(STUDY_DIR, "geo", "usgs_geometry.parquet")
)

# medium range precip forecast evaluation files
MRF_forcing = dict(
    scenario_name="medium_range",
    variable="precipitation",    
    primary_filepath=Path(STUDY_DIR, "timeseries", "forcing_analysis_assim", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_medium_range", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "huc10_huc10_crosswalk.parquet"),                    # the primary and secondary are both HUC10
    geometry_filepath=Path(STUDY_DIR, "geo", "huc10_geometry.parquet"),
)

# short range streamflow forecast evaluation files 
SRF_streamflow = dict(
    scenario_name="short_range",
    variable="streamflow",
    primary_filepath=MRF_streamflow["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

# medium range precip forecast evaluation files
SRF_forcing = dict(
    scenario_name="short_range",
    variable="precipitation",    
    primary_filepath=MRF_forcing["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

eval_scenarios = [MRF_streamflow, MRF_forcing, SRF_streamflow, SRF_forcing]

attribute_paths = dict(
    usgs_upstream_area=Path(STUDY_DIR, "geo", "usgs_attr_upstream_area.parquet"),
    usgs_ecoregions=Path(STUDY_DIR, "geo", "usgs_attr_ecoregions.parquet"),
    usgs_stream_order=Path(STUDY_DIR, "geo", "usgs_attr_stream_order.parquet"),
    usgs_huc_crosswalk=Path(STUDY_DIR, "geo", "usgs_huc12_crosswalk.parquet"),
    #nwm22_huc_crosswalk=Path(STUDY_DIR, "geo", "nwm22_huc12_crosswalk.parquet"),
    #UPSTREAM_IMPERVIOUS = Path(STUDY_DIR, "geo", "usgs_attr_upstream_imperv.parquet")    # don't have this data yet
)
attribute_df = du.combine_attributes(attribute_paths,viz_units)

## Select the scenario and date ranges before launching the dashboard

Next we will check the dates available in the parquet files, and use a slider to select all or a portion of the total available period to evaluate.
(ToDo: create utility to check that data are complete for all of the above defined timeseries files between the min/max dates).

In [None]:
importlib.reload(du)
scenario_selector = du.get_scenario_selector(scenario_name_list=sorted(du.get_scenario_names(eval_scenarios)))  
value_time_slider = du.get_value_time_slider(scenarios)
pn.Column(pn.Spacer(height=10),
          pn.Row(pn.panel(scenario_selector, width = 80), pn.Spacer(width=20), value_time_slider)
         )

## Explore forecasts, one at a time
For an initial example, we will visualize a single reference time at a time

In [None]:
importlib.reload(du)

streamflow_scenario = MRF_streamflow
precip_scenario = MRF_forcing

######### Build components for the dashboard

huc2_selector = du.get_huc2_selector()

# reference time player (eventually replace with individual arrows)
start_date = value_time_slider[1].value_start-timedelta(hours=1)
end_date = value_time_slider[1].value_end
reference_time_player = du.get_reference_time_player_selected_dates(scenario=scenarios, start=start_date, end=end_date)
reftime_player_header = pn.pane.HTML("Use the slider or forward arrow (with line) to select a reference time:", 
                                     style={'font-size': '16px', 'font-weight': 'bold'})
current_ref_time = pn.bind(du.get_reference_time_text, reference_time=reference_time_player.param.value)

# Build background (static) map Elements - background tiles and all gage points 
# for reference on rasterized catchments DynamicMap
tiles_background = gv.tile_sources.CartoLight #OSM
points_background = du.get_all_points(streamflow_scenario)

# bind points dataframe to widgets
points_bind = pn.bind(
    du.build_hv_points_from_query,
    scenario = MRF_streamflow,
    value_time_start=value_time_slider[1].param.value_start,
    value_time_end=value_time_slider[1].param.value_end,
    reference_time_single=reference_time_player.param.value,
    value_min=0,   
    group_by=['primary_location_id','reference_time'],    
    include_metrics=['primary_maximum','secondary_maximum','max_value_delta'],    
    metric_limits=dict(primary_maximum=(0.1, 10e6)),
    attribute_paths=attribute_paths,
    units=viz_units,
)
# precip_tsplot_bind = pn.bind(
#     du.build_hv_precip_tsplot_from_query,
#     scenario = MRF_streamflow,
#     value_time_start=value_time_slider[1].param.value_start,
#     value_time_end=value_time_slider[1].param.value_end,
#     reference_time_single=reference_time_player.param.value,
#     value_min=0,   
#     group_by=['primary_location_id','reference_time'],    
#     include_metrics=['primary_maximum','secondary_maximum','max_value_delta'],    
#     metric_limits=dict(primary_maximum=(0.1, 10e6)),
#     attribute_paths=attribute_paths,
#     units=viz_units,
# )






points_dmap = hv.DynamicMap(points_bind)

# Define stream source as points selection from points_dmap
point_selection = hv.streams.Selection1D(source=points_dmap, index=[0])

######### Common plotting options

map_opts = dict(show_grid=False, show_legend=False, xaxis = None, yaxis = None, width=600, height=400)
points_cmap_opts = dict(cmap=cc.CET_L8[::-1], cnorm='eq_hist', colorbar=True) 
curve_opts = dict(toolbar = None, tools=["hover"], show_title = False, width=1050)

###### Apply style options that vary by element

aggregator = pn.bind(du.get_aggregator, "primary_sum")
tiles_background.opts(**map_opts)
points_background.opts(**map_opts, color='lightgray', size=2, toolbar = 'right')
points_dmap.opts(**map_opts, tools=['hover','tap'], color=hv.dim('max_perc_diff'), 
                 cmap=cc.CET_D1A[::-1], cnorm='linear', clim=(-100,100), colorbar=True,
                 size=5, toolbar='above', title="Peak Error (%)",
                 selection_line_width=5, nonselection_line_width=0, nonselection_alpha=0.1)

###### Panel header

header = pn.Row(
            pn.pane.PNG('https://ciroh.ua.edu/wp-content/uploads/2022/08/CIROHLogo_200x200.png', width=60),
            pn.pane.Markdown(
                """
                ## CIROH Tools for Exploratory Evaluation in Hydrology Research (TEEHR):  Example 1 - Forecast Data Exploration
                """,
                width_policy="max", sizing_mode="stretch_width"
            )
)
# Build the Panel layout
layout = \
    pn.Column(
        pn.Spacer(height=10), header, reftime_player_header,
        pn.Row(
            pn.Column(current_ref_time, reference_time_player),
            pn.Spacer(width=50), huc2_selector),
        pn.Row(tiles_background*points_dmap)
)
# launch the layout
layout.servable()



In [None]:
importlib.reload(du)
precip_plot_hv = du.build_hv_precip_tsplot_from_query_selected_point(
        index=selection_stream.index,
        points_dmap = points_dmap,          
        scenario = MRF_forcing,
        reference_time_single=reference_time_player.value,
        value_min=0,     
        attribute_paths=attribute_paths,
        units=viz_units,
)

In [None]:
index=point_selection.index,
if len(index) > 0 and len(points_dmap.dimensions('value')) > 0:  
    point_id = points_dmap.dimension_values('primary_location_id')[index][0]
    cross = pd.read_parquet(attribute_paths['usgs_huc_crosswalk'])
    huc12_id = cross.loc[cross['primary_location_id']==point_id, 'secondary_location_id'].iloc[0]
    huc10_id = "-".join(['huc10', huc12_id.split("-")[1][:10]])
    title = f"{huc10_id} (Contains Gage: {point_id})"    

In [None]:
importlib.reload(du)
df = du.run_teehr_query(
    query_type="timeseries",
    scenario = MRF_forcing,
    location_id=huc10_id,
    reference_time_single=reference_time_player.value,
    value_min=0,
    order_by=['primary_location_id','reference_time','value_time'],
    attribute_paths=attribute_paths,
    include_geometry=False,
)            

In [None]:
df

In [None]:
# time stuff
units='metric'
opts = dict(curve_opts, xaxis = None, height=150)

df['value_time_str'] = df['value_time'].dt.strftime('%Y-%m-%d-%H')
time_start = df['value_time'].min()
time_end = df['value_time'].max()
        
t = time_start + (time_end - time_start)*0.01
text_x = t.replace(second=0, microsecond=0, minute=0).strftime('%Y-%m-%d-%H')


if units == 'metric':
    unit_rate_label = 'mm/hr'
    unit_cum_label = 'mm'
else:
    unit_rate_label = 'in/hr'
    unit_cum_label = 'in'
    

In [None]:
importlib.reload(du)
if 'value' in df.columns:  #single timeseries
    df['cumulative'] = df['value'].cumsum()
    
    data_max = df['primary_value'].max()
    ymax_bars = max(data_max*1.1,1)
    ymax_curve = max(data_max*1.1,1)
    text_y = ymax_bars*0.9   
    
    bars = hv.Bars(df, kdims = [('value_time_str','Date')], vdims = [('value', 'Precip Rate ' + unit_rate_label)])
    curve = hv.Curve(df, kdims = [('value_time_str', 'Date')], vdims = [('cum', 'Precip ' + unit_cum_label)])
    
    bars.opts(**opts, fill_color = 'blue', line_color = None, ylim=(0, ymax_bars))
    curve.opts(**opts, color='orange', hooks=[du.plot_secondary_bars_curve])
    
else:
    
    df['primary_cumulative'] = df['primary_value'].cumsum()
    df['secondary_cumulative'] = df['secondary_value'].cumsum()
    data_max = max(df['primary_value'].max(), df['secondary_value'].max())
    ymax_bars = max(data_max*1.1,1)
    ymax_curve = max(data_max*1.1,1)
    text_y = ymax_bars*0.9   
    
    bars_prim = hv.Bars(df, kdims = [('value_time_str','Date')], vdims = [('primary_value', 'Precip Rate ' + unit_rate_label)])
    curve_prim = hv.Curve(df, kdims = [('value_time_str', 'Date')], vdims = [('primary_cumulative', 'Precip ' + unit_cum_label)])
    bars_sec = hv.Bars(df, kdims = [('value_time_str','Date')], vdims = [('secondary_value', 'Precip Rate ' + unit_rate_label)])
    curve_sec = hv.Curve(df, kdims = [('value_time_str', 'Date')], vdims = [('secondary_cumulative', 'Precip ' + unit_cum_label)])
    
    bars_prim.opts(**opts, fill_color = 'blue', line_color = None, ylim=(0, ymax_bars))
    curve_prim.opts(**opts, color='orange', hooks=[du.plot_secondary_bars_curve])    
    bars_sec.opts(**opts, fill_color = 'blue', line_color = None, ylim=(0, ymax_bars))
    curve_sec.opts(**opts, color='orange', hooks=[du.plot_secondary_bars_curve])      
    
text = hv.Text(text_x, text_y, title).opts(text_align='left', text_font_size='10pt', 
                                           text_color='#57504d', text_font_style='bold')    

ts_layout_hv = (bars_prim * curve_prim * text).opts(show_title=False)
#ts_layout_hv = (bars_prim * text).opts(show_title=False)
ts_layout_hv

In [None]:
bars_test2 = hv.Bars(df, kdims = [('value_time_str','Date')], vdims = [('primary_value', 'Precip Rate ' + unit_rate_label)])
curve_test2 = hv.Curve(df, kdims = [('value_time_str', 'Date')], vdims = [('primary_cumulative', 'Precip ' + unit_cum_label)])

bars_test2.opts(**opts, fill_color = 'blue', line_color = None, ylim=(0, ymax_bars))
curve_test2.opts(**opts, color='orange', hooks=[du.plot_secondary_bars_curve])  

bars_test2 * curve_test2

In [None]:
curve_test = hv.Curve(df, kdims = [('value_time_str', 'Date')], vdims = [('primary_cumulative', 'Precip ' + unit_cum_label)])
curve_test.opts(color='orange')

In [None]:
curve_test.opts(color='orange', hooks=[du.plot_secondary_bars_curve])

In [None]:
bars_prim.opts(**opts, fill_color = 'blue', line_color = None, ylim=(0, ymax_bars))
curve_prim.opts(**opts, color='orange')

In [None]:
df['primary_cumulative'].sort_values()

In [None]:
def plot_secondary_bars_curve(plot, element):
    """
    Hook to plot data on a secondary (twin) axis on a Holoviews Plot with Bokeh backend.
    More info:
    - http://holoviews.org/user_guide/Customizing_Plots.html#plot-hooks
    - https://docs.bokeh.org/en/latest/docs/user_guide/plotting.html#twin-axes
    """
    fig: Figure = plot.state
    glyph_first: GlyphRenderer = fig.renderers[0]  # will be the original plot
    glyph_last: GlyphRenderer = fig.renderers[-1] # will be the new plot
    right_axis_name = "twiny"
    # Create both axes if right axis does not exist
    if right_axis_name not in fig.extra_y_ranges.keys():
        # Recreate primary axis (left)
        y_first_name = glyph_first.glyph.top
        y_first_min = glyph_first.data_source.data[y_first_name].min()
        y_first_max = glyph_first.data_source.data[y_first_name].max()
        y_first_offset = (y_first_max - y_first_min) * 0.1
        fig.y_range = Range1d(
            start=0,
            end=max(y_first_max,1) + y_first_offset
       )
        fig.y_range.name = glyph_first.y_range_name
        # Create secondary axis (right)
        y_last_name = glyph_last.glyph.y
        y_last_min = glyph_last.data_source.data[y_last_name].min()
        y_last_max = glyph_last.data_source.data[y_last_name].max()
        y_last_offset = (y_last_max - y_last_min) * 0.1
        fig.extra_y_ranges = {right_axis_name: Range1d(
            start=0,
            end=max(y_last_max,1) + y_last_offset
        )}
        fig.add_layout(LinearAxis(y_range_name=right_axis_name, axis_label=glyph_last.glyph.y), "right")
    # Set right axis for the last glyph added to the figure
    glyph_last.y_range_name = right_axis_name

In [None]:
ts_layout_hv = (bars_prim * curve_prim * text).opts(show_title=False)
ts_layout_hv

In [None]:
points_dmap.dimensions()

### Create a linked visualizations using holoviews
First a simple map showing the percent difference in peak flow across the county in this 1 forecast

In [None]:
measure='perc_diff'
width = 700
basemap = osm2 = tiles.OSM()#.redim(x='easting', y='northing') #gv.tile_sources.CartoLight
points_hv = hv.Points(df, kdims=['easting','northing'], vdims=[measure, ('secondary_maximum','fcst_peak'), ('primary_maximum','obs_peak'),('primary_location_id','gage_id')])
points_hv.opts(width=width, height=400, color=hv.dim(measure), clim=(-100,100),
    cmap=cc.CET_D1A[::-1], size = 5, xaxis=None, yaxis=None, colorbar=True, tools=['hover'])

diff_hist = df.hvplot.hist(y=measure, width=width, bins=100, bin_range=(-100, 1000), height=200, xlabel='% Difference Peak Flow')
diff_scat = hv.Scatter(df, kdims=['secondary_maximum'], vdims=['primary_maximum','easting','northing',measure])
diff_scat.opts(alpha=0.2, width=400, height=400, xlabel='Forecast Peak', ylabel='Observed Peak')
ls = hv.link_selections.instance()
ls((basemap*points_hv + diff_scat + diff_hist)).cols(2)

In [None]:
pn.extension(sizing_mode='scale_both')
metrics_gdf = metrics_gdf.to_crs("EPSG:3857")
sdf = spd.GeoDataFrame(metrics_gdf)
title = (f"Reference Time: {reference_time_slider[1].value_start}")
diff_map = sdf.hvplot.points(c='perc_diff', cmap=cc.CET_D1A[::-1], clim=(-100,100), width=800, height=400,
                             clabel="% Difference Peak Flow", title=title, size=5, xaxis = None, yaxis = None, tiles='OSM')
diff_map

### Create other basic plots to explore the data more.... link them to explore

In [None]:
measure='perc_diff'
width = 700
basemap = osm2 = tiles.OSM()#.redim(x='easting', y='northing') #gv.tile_sources.CartoLight
points_hv = hv.Points(df, kdims=['easting','northing'], vdims=[measure, ('secondary_maximum','fcst_peak'), ('primary_maximum','obs_peak'),('primary_location_id','gage_id')])
points_hv.opts(width=width, height=400, color=hv.dim(measure), clim=(-100,100),
    cmap=cc.CET_D1A[::-1], size = 5, xaxis=None, yaxis=None, colorbar=True, tools=['hover'])

diff_hist = df.hvplot.hist(y=measure, width=width, bins=100, bin_range=(-100, 1000), height=200, xlabel='% Difference Peak Flow')
diff_scat = hv.Scatter(df, kdims=['secondary_maximum'], vdims=['primary_maximum','easting','northing',measure])
diff_scat.opts(alpha=0.2, width=400, height=400, xlabel='Forecast Peak', ylabel='Observed Peak')
ls = hv.link_selections.instance()
ls((basemap*points_hv + diff_scat + diff_hist)).cols(2)

In [None]:
importlib.reload(du)
ts_df = du.run_teehr_query(
    query_type="timeseries",
    primary_filepath=scenarios[0]["primary_filepath"],
    secondary_filepath=scenarios[0]["secondary_filepath"],
    crosswalk_filepath=scenarios[0]["crosswalk_filepath"],
    geometry_filepath=scenarios[0]["geometry_filepath"],
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_single=reference_time_slider[1].value_start,    
    value_min=0,  
    attribute_paths=attribute_paths,
    return_query=False,
)
display(ts_df.head())

### Add some additional attributes and generate different plots

normalize flows, add linked histogram of upstream area and/or ecoregion...

In [None]:
##  work on 3 way map.... add precip... add timeseries

# showing off the ability to quickly generate statistics based ont he whole population with different filters, limits, groupings
# also having the raw data right there... for time series plots

pn.extension(sizing_mode='scale_both')
prim_map = sdf.hvplot.points(c='primary_maximum', cmap=cc.CET_L8[::-1], cnorm='eq_hist', clim=(0,15000), width=400,
                             clabel="Peak Flow (cfs)", title=title, size=5, xaxis = None, yaxis = None, tiles='CartoLight')
sec_map = sdf.hvplot.points(c='secondary_maximum', cmap=cc.CET_L8[::-1], cnorm='eq_hist', clim=(0,15000), width=400,
                             clabel="Peak Flow (cfs)", title=title, size=5, xaxis = None, yaxis = None, tiles='CartoLight')

prim_map + sec_map + basemap*points_hv.opts(width=400, height=300)

To do:
build up 3-col explorer layout  ...add precip... add timeseries from prior notebook   
turn into a dashboard at end  
try other scatter layouts, find best for alt dashboard - decide between these two for demo (prob only time for 1 post event example)


In [None]:
pn.extension(sizing_mode='stretch_width')

layout = pn.Column(
    pn.Column(current_ref_time, reference_time_player),
    #pn.Row(pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0)),
    #pn.Row(pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0)),
    pn.Row(pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0)),
    pn.Row(pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0)),
    pn.Spacer(background='green', height=150, margin=0),
    pn.Spacer(background='red', height=150, margin=0),
    )