## TEEHR Example 2 - Explore Forecast Data from a Recent Flood Event

Add more text description about this use case....





### Install and Import packages

In [None]:
%%capture
!pip install spatialpandas colormap colorcet duckdb
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@main'
#!pip install 'teehr @ git+https://[]@github.com/RTIInternational/teehr@39d6627e4f49b0bdeab3a4c4e8837e6ce5a15f78'

In [None]:
import teehr.queries.duckdb as tqd

# dashboard functions
import dashboard_utils as du
import importlib

from datetime import timedelta
from pathlib import Path
import geopandas as gpd
import pandas as pd
import spatialpandas as spd
import numpy as np
import pathlib
from typing import List
import duckdb as ddb

import hvplot
import hvplot.pandas
import holoviews as hv
from holoviews.element import tiles
import geoviews as gv
import panel as pn
import colorcet as cc
from holoviews.operation.datashader import rasterize, spread
hv.extension('bokeh', logo=False)

In [None]:
# evaluation study directory
STUDY_DIR = Path("/home", "jovyan", "shared", "rti-eval", "post-event-example")

## specify general units (english or metric) to show in visualization
viz_units = "metric"

# evaluation scenario definitions - specific variables and configurations to be compared within the overall study

# medium range streamflow forecast evaluation files 
MRF_streamflow = dict(
    scenario_name="medium_range",
    variable="streamflow",
    primary_filepath=Path(STUDY_DIR, "timeseries", "usgs", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "medium_range_mem1", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "usgs_nwm22_crosswalk.parquet"),
    geometry_filepath=Path(STUDY_DIR, "geo", "usgs_geometry.parquet")
)

# medium range precip forecast evaluation files
MRF_forcing = dict(
    scenario_name="medium_range",
    variable="precipitation",    
    primary_filepath=Path(STUDY_DIR, "timeseries", "forcing_analysis_assim", "*.parquet"),
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_medium_range", "*.parquet"),
    crosswalk_filepath=Path(STUDY_DIR, "geo", "huc10_huc10_crosswalk.parquet"),                    # the primary and secondary are both HUC10
    geometry_filepath=Path(STUDY_DIR, "geo", "huc10_geometry.parquet"),
)

# short range streamflow forecast evaluation files 
SRF_streamflow = dict(
    scenario_name="short_range",
    variable="streamflow",
    primary_filepath=MRF_streamflow["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

# medium range precip forecast evaluation files
SRF_forcing = dict(
    scenario_name="short_range",
    variable="precipitation",    
    primary_filepath=MRF_forcing["primary_filepath"],
    secondary_filepath=Path(STUDY_DIR, "timeseries", "forcing_short_range", "*.parquet"),
    crosswalk_filepath=MRF_streamflow["crosswalk_filepath"],
    geometry_filepath=MRF_streamflow["geometry_filepath"],
)

eval_scenarios = [MRF_streamflow, MRF_forcing, SRF_streamflow, SRF_forcing]

attribute_paths = dict(
    usgs_upstream_area=Path(STUDY_DIR, "geo", "usgs_attr_upstream_area.parquet"),
    usgs_ecoregions=Path(STUDY_DIR, "geo", "usgs_attr_ecoregions.parquet"),
    usgs_stream_order=Path(STUDY_DIR, "geo", "usgs_attr_stream_order.parquet"),
    usgs_huc_crosswalk=Path(STUDY_DIR, "geo", "usgs_huc12_crosswalk.parquet"),
    #nwm22_huc_crosswalk=Path(STUDY_DIR, "geo", "nwm22_huc12_crosswalk.parquet"),
    #UPSTREAM_IMPERVIOUS = Path(STUDY_DIR, "geo", "usgs_attr_upstream_imperv.parquet")    # don't have this data yet
)

## Select the scenario and date ranges

Next we will check the dates available in the parquet files, and use a slider to select all or a portion of the total available period to evaluate.
(ToDo: create utility to check that data are complete for all of the above defined timeseries files between the min/max dates).

In [None]:
importlib.reload(du)
scenarios = [MRF_streamflow, MRF_forcing]
[value_time_slider, reference_time_slider] = du.get_filter_date_widgets(scenarios)
pn.Column(pn.Spacer(height=10), value_time_slider, reference_time_slider)

## Get the streamflow data
For an initial example, we will use a single reference time (start of slider) to explore the comparison between forecast and observed data. Later we will use this widget more interactively in a dashboard.

In [None]:
importlib.reload(du)

metrics_gdf = du.run_teehr_query(
    query_type="metrics",
    primary_filepath=scenarios[0]['primary_filepath'],
    secondary_filepath=scenarios[0]['secondary_filepath'],
    crosswalk_filepath=scenarios[0]['crosswalk_filepath'],
    geometry_filepath=scenarios[0]['geometry_filepath'],
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_single=reference_time_slider[1].value_start,    
    value_min=0,    
    include_metrics=['primary_maximum','secondary_maximum','max_value_delta'],
    group_by=['primary_location_id','reference_time'],
    attribute_paths=attribute_paths,
)
# convert units if needed
metrics_gdf = du.convert_metrics_to_viz_units(metrics_gdf, viz_units)

# display a snippet
display(metrics_gdf)

In [None]:
# add percent difference and some useful attributes
metrics_gdf['perc_diff'] = metrics_gdf['max_value_delta']/metrics_gdf['primary_maximum'] * 100
attr_df = du.combine_attributes(attribute_paths,viz_units)
metrics_gdf = du.merge_attr_to_gdf(metrics_gdf, attr_df)


In [None]:
metrics_gdf.columns

### Create a linked visualizations using holoviews
First a simple map showing the percent difference in peak flow across the county in this 1 forecast

In [None]:
measure='perc_diff'
width = 700
basemap = osm2 = tiles.OSM()#.redim(x='easting', y='northing') #gv.tile_sources.CartoLight
points_hv = hv.Points(df, kdims=['easting','northing'], vdims=[measure, ('secondary_maximum','fcst_peak'), ('primary_maximum','obs_peak'),('primary_location_id','gage_id')])
points_hv.opts(width=width, height=400, color=hv.dim(measure), clim=(-100,100),
    cmap=cc.CET_D1A[::-1], size = 5, xaxis=None, yaxis=None, colorbar=True, tools=['hover'])

diff_hist = df.hvplot.hist(y=measure, width=width, bins=100, bin_range=(-100, 1000), height=200, xlabel='% Difference Peak Flow')
diff_scat = hv.Scatter(df, kdims=['secondary_maximum'], vdims=['primary_maximum','easting','northing',measure])
diff_scat.opts(alpha=0.2, width=400, height=400, xlabel='Forecast Peak', ylabel='Observed Peak')
ls = hv.link_selections.instance()
ls((basemap*points_hv + diff_scat + diff_hist)).cols(2)

In [None]:
pn.extension(sizing_mode='scale_both')
metrics_gdf = metrics_gdf.to_crs("EPSG:3857")
sdf = spd.GeoDataFrame(metrics_gdf)
title = (f"Reference Time: {reference_time_slider[1].value_start}")
diff_map = sdf.hvplot.points(c='perc_diff', cmap=cc.CET_D1A[::-1], clim=(-100,100), width=800, height=400,
                             clabel="% Difference Peak Flow", title=title, size=5, xaxis = None, yaxis = None, tiles='OSM')
diff_map

### Create other basic plots to explore the data more.... link them to explore

In [None]:
measure='perc_diff'
width = 700
basemap = osm2 = tiles.OSM()#.redim(x='easting', y='northing') #gv.tile_sources.CartoLight
points_hv = hv.Points(df, kdims=['easting','northing'], vdims=[measure, ('secondary_maximum','fcst_peak'), ('primary_maximum','obs_peak'),('primary_location_id','gage_id')])
points_hv.opts(width=width, height=400, color=hv.dim(measure), clim=(-100,100),
    cmap=cc.CET_D1A[::-1], size = 5, xaxis=None, yaxis=None, colorbar=True, tools=['hover'])

diff_hist = df.hvplot.hist(y=measure, width=width, bins=100, bin_range=(-100, 1000), height=200, xlabel='% Difference Peak Flow')
diff_scat = hv.Scatter(df, kdims=['secondary_maximum'], vdims=['primary_maximum','easting','northing',measure])
diff_scat.opts(alpha=0.2, width=400, height=400, xlabel='Forecast Peak', ylabel='Observed Peak')
ls = hv.link_selections.instance()
ls((basemap*points_hv + diff_scat + diff_hist)).cols(2)

In [None]:
importlib.reload(du)
ts_df = du.run_teehr_query(
    query_type="timeseries",
    primary_filepath=scenarios[0]["primary_filepath"],
    secondary_filepath=scenarios[0]["secondary_filepath"],
    crosswalk_filepath=scenarios[0]["crosswalk_filepath"],
    geometry_filepath=scenarios[0]["geometry_filepath"],
    value_time_start=value_time_slider[1].value_start,    
    value_time_end=value_time_slider[1].value_end,    
    reference_time_single=reference_time_slider[1].value_start,    
    value_min=0,  
    attribute_paths=attribute_paths,
    return_query=False,
)
display(ts_df.head())

### Add some additional attributes and generate different plots

normalize flows, add linked histogram of upstream area and/or ecoregion...

In [None]:
##  work on 3 way map.... add precip... add timeseries

# showing off the ability to quickly generate statistics based ont he whole population with different filters, limits, groupings
# also having the raw data right there... for time series plots

pn.extension(sizing_mode='scale_both')
prim_map = sdf.hvplot.points(c='primary_maximum', cmap=cc.CET_L8[::-1], cnorm='eq_hist', clim=(0,15000), width=400,
                             clabel="Peak Flow (cfs)", title=title, size=5, xaxis = None, yaxis = None, tiles='CartoLight')
sec_map = sdf.hvplot.points(c='secondary_maximum', cmap=cc.CET_L8[::-1], cnorm='eq_hist', clim=(0,15000), width=400,
                             clabel="Peak Flow (cfs)", title=title, size=5, xaxis = None, yaxis = None, tiles='CartoLight')

prim_map + sec_map + basemap*points_hv.opts(width=400, height=300)

To do:
build up 3-col explorer layout  ...add precip... add timeseries from prior notebook   
turn into a dashboard at end  
try other scatter layouts, find best for alt dashboard - decide between these two for demo (prob only time for 1 post event example)


In [None]:
importlib.reload(tu)

# metric query wrapper
gdf = du.get_comparison_metrics(
    primary_filepath=primary_filepath_STREAMFLOW,
    secondary_filepath=secondary_filepath_STREAMFLOW,
    crosswalk_filepath=crosswalk_filepath_STREAMFLOW,
    geometry_filepath=geometry_filepath_STREAMFLOW,      
    single_reference_time=reference_time_player.value,    
    query_value_min=0,
)
gdf.head()

In [None]:
#  will need another wrapper around the above to generate the holoviews element, like in prior notebooks, but with new query wrapper structure

In [None]:
##  stuff below is remnants - reworking dashboards based on above...

In [None]:

## add normalized flow
# max_gdf['primary_ave_norm'] = merge_gdf['primary_maximum'] / merge_gdf['attribute_value'] * 3600 * 12
# max_gdf['secondary_ave_norm'] = merge_gdf['secondary_maximum'] / merge_gdf['attribute_value'] * 3600 * 12

# # subset data based on requested min/max (if any defined, e.g., only > 0 or other threshold)
# if measure_min_requested:
#     data_gdf = data_gdf[data_gdf[measure] >= measure_min_requested]
# if measure_max_requested:
#     data_gdf = data_gdf[data_gdf[measure] <= measure_max_requested]

In [None]:
######### General plotting options

flow_measure = "max_in_hr"
flow_map_title = "Normalized Event Peak Flow (in/hr)"  
precip_map_title = "Total Precipitation (in)"

if flow_measure == "max_recurr_int":
    points_cmap_opts = dict(cmap=dbs.get_recurr_colormap(), legend_position='bottom_right')
else:
    points_cmap_opts = dict(cmap=cc.CET_L8[::-1], cnorm='eq_hist', colorbar=True) 
    
map_opts = dict(show_grid=False, xaxis = None, yaxis = None)
curve_opts = dict(toolbar = None, tools=["hover"], show_title = False)

In [None]:
######### Build components for the dashboard

# Build background (static) map Elements - background tiles and all gage points 
# for reference on rasterized catchments DynamicMap
tiles_background = gv.tile_sources.CartoLight.opts(**map_opts, toolbar = 'right')

In [None]:
#points_background 
# points_background = hv.Points(points_gdf, kdims = ['easting','northing'], vdims = ['id']).opts(color='lightgray', size=2, toolbar='right')
# points = spread(rasterize(points_background), px=4, shape='circle').opts(cmap=["lightgray"]) #, responsive=True)

In [None]:
pn.extension(sizing_mode='stretch_width')

layout = pn.Column(
    pn.Column(current_ref_time, reference_time_player),
    #pn.Row(pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0)),
    #pn.Row(pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0), pn.panel(tiles_background * points, margin=0)),
    pn.Row(pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0)),
    pn.Row(pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0), pn.panel(tiles_background, margin=0)),
    pn.Spacer(background='green', height=150, margin=0),
    pn.Spacer(background='red', height=150, margin=0),
    )

In [None]:
# gspec = pn.GridSpec(sizing_mode='stretch_width', width_policy='max', height=900)

# gspec[0,:] = pn.Column(current_ref_time, reference_time_player, margin=5)
# gspec[1:4,0] = pn.panel(tiles_background, margin=0)
# gspec[1:4,1] = pn.panel(tiles_background, margin=0)
# gspec[1:4,2] = pn.panel(tiles_background, margin=0)
# gspec[4:7,0] = pn.panel(tiles_background, margin=0)
# gspec[4:7,1] = pn.panel(tiles_background, margin=0)
# gspec[4:7,2] = pn.panel(tiles_background, margin=0)
# gspec[7,:] = pn.Spacer(background='green',  margin=0)
# gspec[8,:] = pn.Spacer(background='red',  margin=0)

# gspec