## Post Event 1 - Explore Event Observed Data

In [None]:
%%capture
!pip install spatialpandas colormap colorcet duckdb

In [None]:
import sys
sys.path.insert(0, '../../')
sys.path.insert(0, '../../evaluation/')
sys.path.insert(0, '../../evaluation/queries/')

from evaluation import utils, config
import temp_queries
import temp_utils
import importlib

import duckdb as ddb
import pandas as pd
import panel as pn
import geopandas as gpd
import numpy as np
import pathlib
import xarray as xr
from datetime import datetime, timedelta
from typing import List

import colorcet as cc
#import hvplot.pandas  
import holoviews as hv
import geoviews as gv
import spatialpandas as spd
import datashader as ds
import cartopy.crs as ccrs
from shapely.geometry import Point
from holoviews.operation.datashader import rasterize
from holoviews.operation.datashader import inspect_polygons
from bokeh.models import HoverTool

hv.extension('bokeh', logo=False)

### Static options (set once at start of session, independent of interactive selections) 

In [None]:
# Define configuration (data sources)
forcing_source = config.FORCING_ANALYSIS_ASSIM_PARQUET
flow_source = config.USGS_PARQUET

# TEMPORARY until this info is handled in data model 
# additional metadata, eventually include other characteristic info - mean upstream slope, %imperv, soils, etc.
gage_basin_info_file = pathlib.Path("../data/nwm_gage_basin_polygons.feather")   ## TEMPORARY, these boundaries are not good, have holes, etc.
recurrence_flows_file = pathlib.Path("../data/nwm_v21_recurrence_flows_17C.nc")  ## TEMPORARY, until thresholds/recurr_ints are added to data models
high_flow_threshold = "2_0_year_recurrence_flow_17C"  # TEMPORARY header of 2-yr flows in above recurrence flow file

# source and header of MAP polygons corresponding to data in 'forcing_source'
polygon_file = pathlib.Path("../data/HUC10_Simp005_dd.geojson")            ## temporary, eventually resolve which layer, how to simplify w/o gaps, 
geom_location_id_header_polygons = "HUC10"                                 ## eventually enable different MAP resolution...

# source of HUC2 polygons - for reference only in maps
huc2_file = pathlib.Path("../data/HUC2_Simp01_RemSPac.geojson")

geom_location_id_header_points = "gage_id"
data_location_id_header_points = "usgs_site_code"
data_location_id_header_polygons = "catchment_id"

### Read static/independent data

In [None]:
# read in polygons (if not already in memory - prevent annoying rereading)
if not "polygons_gdf" in locals():
    polygons_gdf = gpd.read_file(polygon_file).to_crs("EPSG:3857")
    polygons_gdf = polygons_gdf[[geom_location_id_header_polygons,'geometry']]

# read in usgs points (if not already in memory - prevent annoying rereading)
if not "points_gdf" in locals():
    points_gdf = utils.get_usgs_gages()
    points_gdf = points_gdf.to_crs("EPSG:3857")
    # add easting and northing - helpful for plotting as points on basemap
    points_gdf['easting'] = points_gdf.geometry.x
    points_gdf['northing'] = points_gdf.geometry.y  

    # TEMPORARY:  build crosswalk between points_gdf and polygons_gdf - i.e., for every point, which catchment it falls within, 
    # if on the border, picks the first one
    points_gdf['catchment_id'] = np.nan
    for i, point in enumerate(points_gdf['geometry']):
        x = point.x
        y = point.y
        pnt = Point(x, y)
        catchment_containing_point = polygons_gdf[(polygons_gdf.contains(pnt) == True)]
        if not catchment_containing_point.empty:
            catchment_id = catchment_containing_point[geom_location_id_header_polygons].iloc[0]  
            points_gdf.loc[points_gdf.index[i], 'catchment_id'] = catchment_id    
    
# read gage_basins, calculate area
if not "gage_basins_gdf" in locals():
    gage_basins = gpd.read_feather(gage_basin_info_file)
    gage_basins['area_m2'] = gage_basins.to_crs("EPSG:3857").geometry.area
    
# read in recurrence flows (if not already in memory - prevent annoying rereading)
# recurrence flows are in units of CFS
if not "recurrence_flows_df" in locals():
    recurrence_flows_ds = xr.open_dataset(recurrence_flows_file, engine="netcdf4")
    recurrence_flows_df = recurrence_flows_ds.to_dataframe()      
    
# read in polygons (if not already in memory - prevent annoying rereading)
if not "polygons_gdf" in locals():
    polygons_gdf = gpd.read_file(polygon_file).to_crs("EPSG:3857")
    polygons_gdf = polygons_gdf[[geom_location_id_header_polygons,'geometry']]
    
if not "huc2_gdf" in locals():
    huc2_gdf_mult = gpd.read_file(huc2_file)
    huc2_gdf = gpd.GeoDataFrame()
    for i, polys in enumerate(huc2_gdf_mult.geometry):
        row = huc2_gdf_mult.loc[[i],['huc2','name','geometry']]
        for poly_part in polys.geoms:  
            row['geometry'] = poly_part
            huc2_gdf = pd.concat([huc2_gdf, row], axis = 0)

### Read and process data

In [None]:
def query_historical_data(   
    data_source: str, 
    data_location_id_header: str = data_location_id_header_points,
    data_location_id_like_string: str = "all", 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    data_type: str = "timeseries",          # 'timeseries' or 'chars'       
) -> pd.DataFrame:
    '''
    Run DuckDB query to extract historical timeseries data 
    or time series characteristics 
    by region (portion of ID) and value_time range
    '''   
    # build filters
    filters = temp_queries.get_historical_filters(
        data_source, 
        data_location_id_header, 
        data_location_id_like_string, 
        start_value_time, 
        end_value_time
    )
    #build query
    if data_type == 'timeseries':
        query = temp_queries.get_historical_timeseries_data_query(
            data_source, 
            data_location_id_header,
            filters=filters
        )
    elif data_type == 'chars':
        query = temp_queries.get_historical_timeseries_chars_query(
            data_source, 
            group_by=[data_location_id_header],
            order_by=[data_location_id_header],
            filters=filters
        )        
    #run query
    df = ddb.query(query).to_df()

    return df

def get_historical_chars_with_geom(
    data_source: str,    
    data_location_id_header: str, 
    data_location_id_like_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = None,      
    geom_gdf = gpd.GeoDataFrame(), 
    geom_id_header: str = None,       
) -> gpd.GeoDataFrame:
    '''
    query data chars
    merge with geometry
    convert units or add recurr flows, if relevant
    '''
    data_df = query_historical_data(
        data_source, 
        data_location_id_header = data_location_id_header, 
        data_location_id_like_string = data_location_id_like_string,
        start_value_time = start_value_time, 
        end_value_time = end_value_time,
        data_type = "chars"
    )          
    # merge with geodataframe (must do this before adding recurrence flows so have the nwm_feature_id)
    data_gdf = temp_utils.merge_df_with_gdf(
        geom_gdf, 
        geom_id_header, 
        data_df, 
        data_location_id_header
    )                    
    # if streamflow, add recurrence flow levels of the peak flows, units currently assumed cfs
    if variable_name == "streamflow":
        keep_measures = ['max']
        if not recurrence_flows_df.empty:
            data_gdf = temp_utils.add_recurrence_interval(data_gdf, recurrence_flows_df, flow_col_label = "max")
            keep_measures = keep_measures + ['max_recurr_int']        
            
    # if precip, convert to inches/hr, currently in mm/s - for all calculated values/measures returned by query
    all_measures = data_df.columns[~data_df.columns.isin([data_location_id_header, 'units'])].to_list() 
    if variable_name == "precipitation_flux":
        keep_measures = ['sum']        
        for col in all_measures:
            data_gdf[col] = round(data_gdf[col]*60*60, 2)    
            data_gdf[col] = data_gdf[col] / 25.4     
            
    # reduce columns as work around to custom hover tool not working in dynamicmap
    keep_cols = [data_location_id_header,'geometry','units'] + keep_measures
    if data_gdf.geom_type.values[0] == 'Point':
        keep_cols = keep_cols + ['latitude','longitude','easting','northing']
    data_gdf = data_gdf.loc[:,keep_cols]   
        
    return data_gdf


def get_historical_timeseries(
    data_source: str,    
    data_location_id_header: str, 
    data_location_id_like_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = None,          
) -> pd.DataFrame:
    '''
    query data
    convert/transform/process data
    '''
    data_df = query_historical_data(
        data_source, 
        data_location_id_header = data_location_id_header, 
        data_location_id_like_string = data_location_id_like_string,
        start_value_time = start_value_time, 
        end_value_time = end_value_time,
        data_type = "timeseries"
    )          
    # if precip, convert values to inches/hr, currently in mm/s
    if variable_name == "precipitation_flux":
        data_df['value'] = round(data_df['value']*60*60, 2)    
        data_df['value'] = data_df['value'] / 25.4      
        data_df['value_cum'] = data_df['value'].cumsum()
        
    return data_df

### Holoviews object definitions

In [None]:
def get_historical_chars_as_geo_element(
    data_source: str,     
    data_location_id_header: str, 
    data_location_id_like_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = None,       
    geom_gdf = gpd.GeoDataFrame(), 
    geom_id_header: str = None,     
    measure: str = None,
    measure_min_requested = None,
    measure_max_requested = None,
) -> hv.Element:
    '''
    
    '''
    # get data with geometry
    data_gdf = get_historical_chars_with_geom(
        data_source = data_source,
        data_location_id_header = data_location_id_header,
        data_location_id_like_string = data_location_id_like_string, 
        start_value_time = start_value_time,
        end_value_time = end_value_time,
        variable_name = variable_name,        
        geom_gdf = geom_gdf,
        geom_id_header = geom_id_header,               
    )
    # subset data based on requested min/max (if any defined, e.g., only > 0 or other threshold)
    if measure_min_requested:
        data_gdf = data_gdf[data_gdf[measure] >= measure_min_requested]
    if measure_max_requested:
        data_gdf = data_gdf[data_gdf[measure] <= measure_max_requested]
    
    # find the actual min/max values of the extracted data for rescaling plots
    measure_min_in_dataset = data_gdf[measure].min()
    measure_max_in_dataset = data_gdf[measure].max()    
          
    #convert to spatialpandas object (required for inspect polygons function)
    data_sdf = spd.GeoDataFrame(data_gdf)   
    
    # check geometry type
    geom_type = data_gdf.geometry.type.iloc[0]
    
    if geom_type == 'Polygon':    
        
        # declare polygon geoviews object           
        label = f"Mean Areal Precip | {start_value_time} | {end_value_time}"
        map_element_hv = gv.Polygons(
            data_sdf,
            crs=ccrs.GOOGLE_MERCATOR, 
            vdims=[measure, data_location_id_header],
            label = label,
        )    
        
    elif geom_type == 'Point':      
        # define data dimensions - more complex for points so plot linkages work
        non_measures = [geom_location_id_header_polygons, data_location_id_header, 
                        'geometry','units','latitude','longitude','easting','northing']
        all_measures = data_sdf.columns[~data_gdf.columns.isin(non_measures)].to_list()    

        # define dimensions        
        sorted_measures = [measure] + [m for m in all_measures if m!=measure]
        vdims = sorted_measures + [data_location_id_header]
        kdims = ['easting','northing']
        all_cols_except_geom = vdims + kdims + ['latitude','longitude']

        # leave out geometry - easier to work with the data
        data_df = data_sdf.loc[:,all_cols_except_geom]

        # if mapping the recurrence interval, sort points so legend appears in order
        if measure == 'max_recurr_int': 
            data_df = data_df.sort_values(measure, ascending = False)     

        # declare points holoviews object   
        label = f"{measure} | {start_value_time} | {end_value_time}"
        map_element_hv = hv.Points(
            data_df, 
            kdims = kdims, 
            vdims = vdims,
            label = label,
        )
        map_element_hv.relabel(f"{measure} | {start_value_time} | {end_value_time}")
        
        tooltips = [('ID', '@usgs_site_code'),('Max Flow (cfs)', '@max')]
        hover = HoverTool(tooltips=tooltips)            
        map_element_hv.opts(tools=[hover])

    # reset the data range based on data in the current sample
    map_element_hv.redim.range(**{f"{measure}": (measure_min_in_dataset, measure_max_in_dataset)})  
    map_element_hv.relabel(label)
        
    return map_element_hv    


def get_historical_timeseries_as_ts_element(
    index: List[int],
    points_dmap: hv.DynamicMap,
    variable_name: str = "streamflow", 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    element_type = "curve",
    opts = {},
):
    '''

    '''    
    if len(index) > 0 and len(points_dmap.dimensions('value')) > 0:    
     
        point_id = points_dmap.dimension_values(data_location_id_header_points)[index][0]
        
        if variable_name == "precipitation_flux":
            polygon_id = points_gdf.loc[points_gdf[geom_location_id_header_points] == point_id, 'catchment_id'].iloc[0]  
            df = get_historical_timeseries(
                data_source = forcing_source,
                data_location_id_header = data_location_id_header_polygons, 
                data_location_id_like_string = polygon_id, 
                start_value_time = event_dates_slider.value_start,
                end_value_time = event_dates_slider.value_end,
                variable_name = variable_name,
            )            
            label = f"{geom_location_id_header_polygons}: {polygon_id}"  
            
            if element_type == "curve":
                ts_element_hv = hv.Curve(df, ("value_time", "Date"), ("value_cum", "Cum. Precip (in)"), label=label)
            elif element_type == "bars":
                ts_element_hv = hv.Bars(df, ("value_time", "Date"), ("value_cum", "Cum. Precip (in)"), label=label)
                
            ts_element_hv.relabel(label)
            ts_element_hv.opts(**opts, xaxis = False, color="cyan")            
            
        elif variable_name == "streamflow":
            
            df = get_historical_timeseries(
                data_source = flow_source,
                data_location_id_header = data_location_id_header_points, 
                data_location_id_like_string = point_id, 
                start_value_time = event_dates_slider.value_start,
                end_value_time = event_dates_slider.value_end,
                variable_name = variable_name,
            ) 
            label = f"{geom_location_id_header_points}: {point_id}"
            ts_element_hv = hv.Curve(df, ("value_time", "Date"), ("value", "Flow (cfs)"), label=label)
            ts_element_hv.relabel(label)
            ts_element_hv.opts(**opts, color="blue")
    
        return ts_element_hv      

            
def get_aggregator(measure):
    '''
    datashader aggregator function
    '''
    return ds.mean(measure)

### Launch the Dashboard

In [None]:
importlib.reload(temp_queries)
importlib.reload(temp_utils)
pn.extension(sizing_mode='fixed') #'stretch_width')

# declare selection widgets - only dates this version
event_dates_slider = temp_utils.get_event_date_range_slider([forcing_source, flow_source])

# declare datashader aggregator
aggregator = pn.bind(get_aggregator, "sum")

# bind catchment geoviews to widgets (pulling CONUS wide, obs data does not warrant selection by HUC2)
catchments_bind = pn.bind(
    get_historical_chars_as_geo_element, 
    data_source = forcing_source,   
    data_location_id_header = data_location_id_header_polygons,    
    data_location_id_like_string = "all",     
    start_value_time = event_dates_slider.param.value_start,
    end_value_time = event_dates_slider.param.value_end,    
    variable_name = "precipitation_flux",  
    geom_gdf = polygons_gdf,
    geom_id_header = geom_location_id_header_polygons, 
    measure = "sum",
    measure_min_requested = 0,
)
# bind points dataframe to widgets  - !!! Add option to request list of gages within HUC2   
points_bind = pn.bind(
    get_historical_chars_as_geo_element,
    data_source = flow_source,
    data_location_id_header = data_location_id_header_points,
    data_location_id_like_string = "all",     
    start_value_time = event_dates_slider.param.value_start,
    end_value_time = event_dates_slider.param.value_end,     
    variable_name = "streamflow",     
    geom_gdf = points_gdf,
    geom_id_header = geom_location_id_header_points,
    measure = "max_recurr_int",
    measure_min_requested = 1,
)

# Build background (static) map Elements - background tiles and all gage points for reference rasterized catchments DynamicMap
tiles_background = gv.tile_sources.OSM
points_background = hv.Points(points_gdf, kdims = ['easting','northing'], vdims = ['gage_id'])

# Build dynamic (changing) DynamicMaps - rasterized catchments (left map) and current high flow gage points (right map)
raster_catchments = rasterize(hv.DynamicMap(catchments_bind), aggregator=aggregator, precompute=True)
points_dmap = hv.DynamicMap(points_bind)

# Define stream source as selected gage points
selection_stream = hv.streams.Selection1D(source=points_dmap, index=[0])
    
curve_opts = dict(width=600, height=200, toolbar = None, tools=["hover"])
    
flow_curve_bind = pn.bind(
    get_historical_timeseries_as_ts_element,
    index=selection_stream.param.index,
    points_dmap = points_dmap,
    variable_name="streamflow",
    start_value_time = event_dates_slider.param.value_start,
    end_value_time = event_dates_slider.param.value_end,   
    element_type = "curve",
    opts = curve_opts,
)
    
precip_bars_bind = pn.bind(
    get_historical_timeseries_as_ts_element,
    index=selection_stream.param.index,
    points_dmap = points_dmap,
    variable_name="precipitation_flux",
    start_value_time = event_dates_slider.param.value_start,
    end_value_time = event_dates_slider.param.value_end,
    element_type = "curve",
    opts = curve_opts,
)

# various plotting options
map_opts = dict(width=600, height=400, show_grid=False, xaxis = None, yaxis = None)

tiles_background.opts(**map_opts)
raster_catchments.opts(**map_opts, colorbar=True, cmap=temp_utils.get_precip_colormap(), clim=(1, 20), toolbar = 'right')
points_background.opts(**map_opts, color='lightgray', size=2, toolbar = 'right')
points_dmap.opts(**map_opts, tools=['hover','tap'], color=hv.dim("max_recurr_int"), cmap=temp_utils.get_recurr_colormap(), 
                 size = 5, legend_position = 'bottom_right', toolbar = 'right')


header = pn.Row(
            pn.pane.PNG('https://ciroh.ua.edu/wp-content/uploads/2022/08/CIROHLogo_200x200.png', width=100),
            pn.pane.Markdown(
                """
                # CIROH Exploratory Evaluation Toolset
                ## Post-Event Observed Data Exploration
                """,
                width=800
            )
)
# Build the Panel layout
layout = \
    pn.Column(
        header,
        event_dates_slider,    
        (tiles_background * points_background * raster_catchments) + 
        (tiles_background * points_background * points_dmap),
        pn.Row(precip_bars_bind, flow_curve_bind),
    )

# launch the layout
layout.servable()


## alter non-select alpha, add border on selected?
## accum precip to daily, add bar chart
## axis labels
## flow hover - worked around for now
## dot size on zoom in - too small to see
## switch to max/ave flow
## prevent ts plots from disappearing

In [None]:
df = get_historical_timeseries_as_ts_element(
    index=selection_stream.param.index,
    points_dmap = points_dmap,
    variable_name="precipitation_flux",
    start_value_time = event_dates_slider.param.value_start,
    end_value_time = event_dates_slider.param.value_end,
    element_type = "curve",
    opts = curve_opts)