## Post Event 1 - Explore Event Observed Data

In [None]:
%%capture
!pip install spatialpandas colormap colorcet duckdb

In [None]:
import sys
sys.path.insert(0, '../../')
sys.path.insert(0, '../../evaluation/')
sys.path.insert(0, '../../evaluation/queries/')

from evaluation import utils, config
import temp_queries
import temp_data_utils
import temp_eval_dashboard_utils as temp_dash_utils
import importlib

import duckdb as ddb
import pandas as pd
import panel as pn
import geopandas as gpd
import numpy as np
import pathlib
import xarray as xr
from datetime import datetime, timedelta
from typing import List

import colorcet as cc
#import hvplot.pandas  
import holoviews as hv
import geoviews as gv
import spatialpandas as spd
import datashader as ds
import cartopy.crs as ccrs
from shapely.geometry import Point
from holoviews.operation.datashader import rasterize
from holoviews.operation.datashader import inspect_polygons

hv.extension('bokeh', logo=False)
pn.extension(sizing_mode='stretch_width')

### Static options (set once at start of session, independent of interactive selections) 

In [None]:
# Define configuration (data sources)
forcing_source = config.FORCING_ANALYSIS_ASSIM_PARQUET
flow_source = config.USGS_PARQUET

# Source of recurrence flow magnitudes per location
recurrence_file = pathlib.Path("../data/nwm_v21_recurrence_flows_17C.nc")  ## temporary, put in config if keeping
high_flow_threshold = "2_0_year_recurrence_flow_17C"

# gage upstream basin info - TEMPORARY, these boundaries are not good, have holes, etc.
# eventually include other characteristic info - mean upstream slope, %imperv, soils, etc.
gage_basin_info_file = pathlib.Path("../data/nwm_gage_basin_polygons.feather")

# source and header (resolution) of MAP polygons corresponding to data in 'forcing_source'
polygon_file = pathlib.Path("../data/HUC10_Simp005_dd.geojson")            ## temporary, eventually resolve which layer, how to simplify w/o gaps, 
polygon_id_header = "HUC10"                                                ## if/how to allow different MAP resolution...

# source of HUC2 polygons - for reference only in maps
huc2_file = pathlib.Path("../data/HUC2_Simp01_RemSPac.geojson")

# Units applies to all data
# english -> inches for precp, cfs for flow
# metric -> mm for precip, cms for flow
current_units = "english"


### Read static/independent data

In [None]:
# read in usgs points (if not already in memory - prevent annoying rereading)
if not "points_gdf" in locals():
    points_gdf = utils.get_usgs_gages()
    points_gdf = points_gdf.to_crs("EPSG:3857")
    # add easting and northing - helpful for plotting as points on basemap
    points_gdf['easting'] = points_gdf.geometry.x
    points_gdf['northing'] = points_gdf.geometry.y       
    
# read gage_basins, calculate area
if not "gage_basins_gdf" in locals():
    gage_basins = gpd.read_feather(gage_basin_info_file)
    gage_basins['area_m2'] = gage_basins.to_crs("EPSG:3857").geometry.area
    
# read in recurrence flows (if not already in memory - prevent annoying rereading)
# recurrence flows are in units of CFS
if not "recurrence_flows_df" in locals():
    recurrence_flows_ds = xr.open_dataset(recurrence_file, engine="netcdf4")
    recurrence_flows_df = recurrence_flows_ds.to_dataframe()      
    
# read in polygons (if not already in memory - prevent annoying rereading)
if not "polygons_gdf" in locals():
    polygons_gdf = gpd.read_file(polygon_file).to_crs("EPSG:3857")
    polygons_gdf = polygons_gdf[[polygon_id_header,"geometry"]]
    
if not "huc2_gdf" in locals():
    huc2_gdf_mult = gpd.read_file(huc2_file)
    huc2_gdf = gpd.GeoDataFrame()
    for i, polys in enumerate(huc2_gdf_mult.geometry):
        row = huc2_gdf_mult.loc[[i],["huc2","name","geometry"]]
        for poly_part in polys.geoms:  
            row["geometry"] = poly_part
            huc2_gdf = pd.concat([huc2_gdf, row], axis = 0)

### Read and process data

In [None]:
def get_historical_timeseries_chars_with_geom(
    gdf, 
    geom_id_header: str, 
    source: str, 
    location_id_header: str, 
    location_id_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = 'streamflow',    
    units = 'english',    
    measure: str = 'max',
    measure_min_allowable = None,
    measure_max_allowable = None,
    recurrence_flows_df: pd.DataFrame() = None,
    high_flow_threshold: str = "2_0_year_recurrence_flow_17C",
) -> gpd.GeoDataFrame:
    '''
    query data
    convert/transform/process data
    merge with geometry
    '''
    # query timeseries characteristics
    df = temp_dash_utils.get_historical_timeseries_chars(
        source, 
        location_id_header, 
        location_id_string, 
        start_value_time, 
        end_value_time,
    )
    # convert units if needed  
    all_measures = df.columns[~df.columns.isin([location_id_header, 'units'])].to_list()
    converted_df = temp_data_utils.convert_units(
        df, 
        variable_name,
        all_measures,
        units,
    )      
    # merge with geodataframe (must do this before adding recurrence flows so have the nwm_feature_id)
    merged_gdf = temp_data_utils.merge_df_with_gdf(
        gdf, 
        geom_id_header, 
        converted_df, 
        location_id_header
    )                
    # if streamflow, add recurrence flow levels of the peak flows
    if variable_name == "streamflow":
        if not recurrence_flows_df.empty:
            merged_gdf = temp_data_utils.add_recurrence_interval(merged_gdf, recurrence_flows_df, flow_col_label = 'max')
            #converted_df = converted_df.merge(recurrence_flows_df[[high_flow_threshold]], right_on = "feature_id", left_on = "nwm_feature_id")
            all_measures = all_measures + ['max_recurr_int']        
    
    # subset data based on min/max (if any)
    subset_gdf = temp_data_utils.subset_df_by_measure_min_max(
        merged_gdf,
        measure,
        measure_min_allowable,
        measure_max_allowable,
    )
    # reduce columns
    keep_cols = [location_id_header,'geometry','units'] + all_measures
    if subset_gdf.geom_type.values[0] == 'Point':
        keep_cols = keep_cols + ['latitude','longitude','easting','northing']
    subset_gdf = subset_gdf.loc[:,keep_cols]    
        
    return subset_gdf

### Holoviews object definitions

In [None]:
def get_historical_timeseries_chars_polygons_hv(
    gdf, 
    geom_id_header: str, 
    source: str, 
    location_id_header: str, 
    location_id_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = 'streamflow',  
    session_units = 'english',    
    measure: str = 'max',
    measure_min_allowable = None,
    measure_max_allowable = None,
) -> hv.Element:
    '''
    
    '''
    # get data with geometry
    merged_gdf = get_historical_timeseries_chars_with_geom(
        gdf = gdf,
        geom_id_header = geom_id_header,
        source = source,    
        location_id_header = location_id_header,
        location_id_string = location_id_string, 
        start_value_time = start_value_time,
        end_value_time = end_value_time,
        variable_name = variable_name,
        units = session_units,        
        measure = measure,
        measure_min_allowable = measure_min_allowable,
        measure_max_allowable = measure_max_allowable,
    )
    # get min/max values of the extracted data
    measure_min_in_dataset = merged_gdf[measure].min()
    measure_max_in_dataset = merged_gdf[measure].max()    
          
    #convert to spatialpandas object (required for inspect polygons function)
    merged_sdf = spd.GeoDataFrame(merged_gdf)         

    # declare polygon geoviews object   
    polygons_hv = gv.Polygons(
        merged_sdf,
        crs=ccrs.GOOGLE_MERCATOR, 
        vdims=[measure, location_id_header]
    )           
    # reset the data range based on data in the current sample
    polygons_hv = polygons_hv.redim.range(**{f"{measure}": (measure_min_in_dataset, measure_max_in_dataset)})
        
    return polygons_hv    

def get_historical_timeseries_chars_points_hv(
    gdf, 
    geom_id_header: str, 
    source: str, 
    location_id_header: str, 
    location_id_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = 'streamflow',
    session_units = 'english',    
    measure: str = 'max',
    measure_min_allowable = None,
    measure_max_allowable = None,
    recurrence_flows_df: pd.DataFrame() = None,
    high_flow_threshold: str = "2_0_year_recurrence_flow_17C",
) -> hv.Element:
    '''

    '''
    # get data with geometry
    merged_gdf = get_historical_timeseries_chars_with_geom(
        gdf = gdf,
        geom_id_header = geom_id_header,
        source = source,    
        location_id_header = location_id_header,
        location_id_string = location_id_string, 
        start_value_time = start_value_time,
        end_value_time = end_value_time,
        variable_name = variable_name,
        units = session_units,        
        measure = measure,
        measure_min_allowable = measure_min_allowable,
        measure_max_allowable = measure_max_allowable,
        recurrence_flows_df = recurrence_flows_df,
        high_flow_threshold = high_flow_threshold,
    )
    # get min/max values of the extracted data
    measure_min_in_dataset = merged_gdf[measure].min()
    measure_max_in_dataset = merged_gdf[measure].max()       

    # define data dimensions so plot linkage work
    non_measures = [polygon_id_header, location_id_header, 'geometry','units','latitude','longitude','easting','northing']
    all_measures = merged_gdf.columns[~merged_gdf.columns.isin(non_measures)].to_list()    
             
    # define dimensions        
    sorted_measures = [measure] + [m for m in all_measures if m!=measure]
    vdims = sorted_measures + [location_id_header]
    kdims = ['easting','northing']
    cols = vdims + kdims + ['latitude','longitude']
    
    # leave out geometry - easier to work with the data
    merged_df = merged_gdf.loc[:,cols]

    # if mapping the recurrence interval, sort points so legend appears in order
    if measure == 'max_recurr_int': 
        merged_df = merged_df.sort_values(measure, ascending = False)     
    
    # declare polygon holoviews object   
    points_hv = hv.Points(
        merged_df, 
        kdims = kdims, 
        vdims = vdims,
    )            
    return points_hv

def get_flow_timeseries_data_selected_point(index):
    '''

    '''    
    print(type(index))
    print(index)
    
    if len(index) > 0 and len(points_dmap.dimensions('value')) > 0:    
    
#    if len(index) == 0 or len(points_dmap.dimensions('value')) == 0:
#         converted_df = pd.DataFrame(np.full((1,2), np.nan), columns = ['value','value_time'], index = ['None'])
#         gage_id = "None"
#         label = f"usgs_site_code: {gage_id} | NO DATA" 
#         val_min = val_max = np.nan
        
#     else:    
        gage_id = points_dmap.dimension_values('usgs_site_code')[index][0]
        df = temp_dash_utils.get_historical_timeseries_data(
            source = flow_source, 
            location_id_header = "usgs_site_code", 
            location_id_string = gage_id, 
            start_value_time = event_dates_slider.value_start,
            end_value_time = event_dates_slider.value_end,
        )
        # convert units
        converted_df = temp_data_utils.convert_units(
            df, 
            variable_name = "precipitation_flux",
            value_columns = ["value"],
            units = session_units,
        )      
        val_min = converted_df["value"].min()
        val_max = converted_df["value"].max()  
        label = f"usgs_site_code: {gage_id} | max: {val_max} | start_time: {event_dates_slider.value_start}"         
    
        ts_curve_hv = hv.Curve(converted_df, "value_time", "value", label=label)
        ts_curve_hv.relabel(label)

        # trying to get plot limits to reset, not working
        # ts_curve_hv.opts.clear()                                                 
        # ts_curve_hv.opts(tools=["hover"], color="blue", ylim=(0, val_max*1.25))
        # ts_curve_hv = ts_curve_hv.redim.range(value=(0, val_max)) 
    
        return ts_curve_hv   

def get_precip_timeseries_data_selected_point(index):
    '''

    '''    
    if len(index) > 0 and len(points_dmap.dimensions('value')) > 0:      
        x = points_dmap.dimension_values('easting')[index][0]
        y = points_dmap.dimension_values('northing')[index][0]
        pnt = Point(x, y)
        catchment = polygons_gdf[(polygons_gdf.contains(pnt) == True)]
        catchment_id = catchment[polygon_id_header].iloc[0]

        # get the data
        df = temp_dash_utils.get_historical_timeseries_data(
            source = forcing_source, 
            location_id_header = "catchment_id", 
            location_id_string = catchment_id, 
            start_value_time = event_dates_slider.value_start,
            end_value_time = event_dates_slider.value_end,
        )
        # convert units
        converted_df = temp_data_utils.convert_units(
            df, 
            variable_name = "precipitation_flux",
            value_columns = ["value"],
            units = session_units,
        )      
        val_min = converted_df["value"].min()
        val_max = converted_df["value"].max()  
        label = f"{polygon_id_header}: {catchment_id}"
        
    else:
        converted_df = pd.DataFrame(np.full((1,2), np.nan), columns = ['value','value_time'], index = ['None'])
        catchment_id = "None"
        label = f"{polygon_id_header}: {catchment_id} | NO DATA"
        val_min = val_max = np.nan
        #ts_curve_hv = hv.Curve(converted_df, label=label)

    ts_curve_hv = hv.Curve(converted_df, "value_time", "value", label=label)

    # trying to get plot limits to reset, not working
    ts_curve_hv.opts.clear()                                                 
    ts_curve_hv.opts(tools=["hover"], color="blue", ylim=(0, val_max*1.25))
    ts_curve_hv = ts_curve_hv.redim.range(value=(0, val_max))          
    
    return ts_curve_hv

def get_basemap_gv(opts):
    '''
    get OSM basemap as geoviews object
    '''
    tiles = gv.tile_sources.OSM.opts(**opts)
    return tiles

def get_aggregator(measure):
    '''
    datashader aggregator function
    '''
    return ds.mean(measure)

### Launch the Dashboard

In [None]:
importlib.reload(temp_queries)
importlib.reload(temp_data_utils)
importlib.reload(temp_dash_utils)

# Define recurring view options
opts = dict(width=600, height=600, show_grid=False)

# declare selection widgets - only dates this version
event_dates_slider = temp_dash_utils.get_event_date_range_slider(forcing_source)

# declare datashader aggregator for precip
aggregator = pn.bind(get_aggregator, "sum")

# bind catchment geoviews to widgets
precip_measure = "sum"
catchments_bind = pn.bind(
    get_historical_timeseries_chars_polygons_hv, 
    gdf = polygons_gdf,
    geom_id_header = "HUC10",
    source = forcing_source,    
    location_id_header = "catchment_id",
    location_id_string = "all", 
    start_value_time = event_dates_slider.param.value_start,
    end_value_time = event_dates_slider.param.value_end,
    variable_name = "precipitation_flux",
    session_units = session_units,    
    measure = precip_measure,
    measure_min_allowable = 0,
)
# bind points dataframe to widgets  - !!! Add option to request list of gages within HUC2
flow_measure = "max_recurr_int"
if flow_measure == "max_recurr_int":
    flow_cmap = temp_dash_utils.get_recurr_colormap()
    flow_color_opts = dict(cmap=flow_cmap, legend_position = 'bottom_right')
else:
    flow_cmap = "viridis_r"
    flow_color_opts = dict(cmap=flow_cmap, colorbar = True, cnorm = 'log')

points_bind = pn.bind(
    get_historical_timeseries_chars_points_hv,
    gdf = points_gdf,
    geom_id_header = "gage_id",
    source = flow_source,    
    location_id_header = "usgs_site_code",
    location_id_string = "", 
    start_value_time = event_dates_slider.param.value_start,
    end_value_time = event_dates_slider.param.value_end,
    variable_name = "streamflow",
    session_units = session_units,    
    measure = flow_measure,
    measure_min_allowable = 1,
    recurrence_flows_df = recurrence_flows_df,
    high_flow_threshold = high_flow_threshold,
)

# Get rasterized catchments DynamicMap
raster_catchments = rasterize(hv.DynamicMap(catchments_bind), 
                              aggregator=aggregator, precompute=True).opts(**opts, colorbar=True, cmap=temp_dash_utils.get_precip_colormap(), clim=(1, 20))

# Get background gage points Element (static)
points_background = hv.Points(points_gdf, kdims = ['easting','northing'], vdims = ['gage_id']).opts(color='lightgray', size=2)

# Get gage points DynamicMap
points_dmap = hv.DynamicMap(points_bind).opts(**opts,
    color=hv.dim(flow_measure), size=5, **flow_color_opts)

# Define stream source and type to select gage points
selection_stream = hv.streams.Selection1D(source=points_dmap)#, index[0])
 
flow_curve_bind = pn.bind(
    get_flow_timeseries_data_selected_point,
    index=selection_stream.param.index
)
    
# Get flow timeseries DynamicMap
#flow_curve_dmap = hv.DynamicMap(get_flow_timeseries_data_selected_point, streams=[selection_stream])

# Get precip timeseries DynamicMap
precip_curve_dmap = hv.DynamicMap(get_precip_timeseries_data_selected_point, streams=[selection_stream])

# Build the Panel layout
layout = \
    pn.Column(
        pn.Row(
            pn.pane.PNG('https://ciroh.ua.edu/wp-content/uploads/2022/08/CIROHLogo_200x200.png', width=100),
            pn.pane.Markdown(
                """
                # CIROH Exploratory Evaluation Toolset
                ## Post-Event Observed Data Exploration
                """,
                width=800
            )
        ),
        event_dates_slider,    
        (hv.DynamicMap(get_basemap_gv(opts)) * points_background * raster_catchments) + 
        (hv.DynamicMap(get_basemap_gv(opts)) * points_background * points_dmap.opts(tools=['hover','tap'])),
        flow_curve_bind,
        # precip_curve_dmap.opts(width=600) + 
        # flow_curve_dmap.opts(width=600),
        #flow_curve_dmap + 
        #precip_curve_dmap,
    )

# launch the layout
layout.servable()

In [None]:
curve_hv = get_flow_timeseries_data_selected_point(index=[9])