## Post Event 1 - Explore Event Observed Data

In [None]:
%%capture
!pip install spatialpandas colormap colorcet duckdb streamz

In [None]:
import sys
sys.path.insert(0, '../../')
sys.path.insert(0, '../../evaluation/')
sys.path.insert(0, '../../evaluation/queries/')

#import post_event_dashboard_1 as db1
import temp_queries
from evaluation import utils, config
import importlib

import duckdb as ddb
import pandas as pd
import panel as pn
import geopandas as gpd
import numpy as np
import pathlib
import xarray as xr
from datetime import datetime, timedelta
from typing import List

import colorcet as cc 
import holoviews as hv
import geoviews as gv
import spatialpandas as spd
import datashader as ds
import cartopy.crs as ccrs
from shapely.geometry import Point
from holoviews.operation.datashader import rasterize
from bokeh.models import HoverTool, Range1d

hv.extension('bokeh', logo=False)

### Define static options (independent of interactive selections) 

#### All structures temporary until data models are finalized

In [None]:
# reference polygons 
polygon_info = dict(
    huc2_file = pathlib.Path("../data/HUC2_Simp01_RemSPac.geojson"), # TEMPORARY, gaps/holes
    huc2_header = "huc2",
    huc10_file = pathlib.Path("../data/HUC10_Simp005_dd.geojson"),   # TEMPORARY, gaps/holes
    huc10_header = "HUC10",
)

forcing_info = dict(
    source = config.FORCING_ANALYSIS_ASSIM_PARQUET,
    data_location_id_header = "catchment_id",
    geom_location_id_header = polygon_info['huc10_header'],
    geom_file = polygon_info['huc10_file'],
)

streamflow_info = dict(
    source = config.USGS_PARQUET,
    data_location_id_header = "usgs_site_code",
    geom_location_id_header = "gage_id",
    gage_basins_file = pathlib.Path("../data/gage_basins.geojson")   # Multipolygons, some old, some holes, need to get outer boundary (explode, concave hull)
    #recurrence_flows_file = pathlib.Path("../data/nwm_v21_recurrence_flows_17C.nc"),  # TEMPORARY, until thresholds/recurr_ints are added to data models
    #high_flow_threshold = "2_0_year_recurrence_flow_17C",                             # TEMPORARY header of 2-yr flows in above recurrence flow file
)
# eventually include other characteristic info - mean upstream slope, %imperv, soils, etc.



### Read static data

In [None]:
importlib.reload(db1)

# read static data if not already in memory (prevent annoying rereading)

# read in polygons associated with MAP data
if not "polygons_gdf" in locals():
    print('Reading polygons...')
    polygons_gdf = gpd.read_file(forcing_info['geom_file']).to_crs("EPSG:3857")
    polygons_gdf = polygons_gdf[[forcing_info['geom_location_id_header'],'geometry']]
    
# read in usgs points
if not "points_gdf" in locals():
    print('Reading usgs points...')
    points_gdf = db1.read_points(streamflow_info)
    #points_gdf = gpd.read_parquet(streamflow_info['gage_points_file'])
    
# TEMPORARY:  build crosswalk between points_gdf and polygons_gdf - i.e., for every point, which catchment it falls within, 
# if on the border, picks the first one
if not 'catchment_id' in points_gdf.columns:
    print('Building catchment-gage point crosswalk...')
    points_gdf['catchment_id'] = np.nan
    for i, point in enumerate(points_gdf['geometry']):
        x = point.x
        y = point.y
        pnt = Point(x, y)
        catchment_containing_point = polygons_gdf[(polygons_gdf.contains(pnt) == True)]
        if not catchment_containing_point.empty:
            catchment_id = catchment_containing_point[forcing_info['geom_location_id_header']].iloc[0]
            points_gdf.loc[points_gdf.index[i], 'catchment_id'] = catchment_id    
            
# TEMPORARY read gage_basins, add area to points_gdf
if not "gage_basins_gdf" in locals():
    print('Reading gage basins...')
    gage_basins_gdf = gpd.read_file(streamflow_info['gage_basins_file']).set_index('usgs_site_code')
    gage_basins_gdf['AREA_KM2'] = gage_basins_gdf['AREA_FT2'] / (3.28**2) / (1000**2)
    points_gdf['upstr_area_km2'] = np.nan
    ind_w_upstr_area = points_gdf[points_gdf['gage_id'].isin(gage_basins_gdf.index)].index
    points_gdf.loc[ind_w_upstr_area,'upstr_area_km2'] = \
        gage_basins_gdf.loc[points_gdf.loc[ind_w_upstr_area,'gage_id'],'AREA_KM2'].to_numpy()
    
# TEMPORARY read in recurrence flows (if not already in memory - prevent annoying rereading)
# recurrence flows are in units of CFS
# if not "recurrence_flows_df" in locals():
#     print('Reading recurrence flows...')
#     recurrence_flows_ds = xr.open_dataset(streamflow_info['recurrence_flows_file'], engine="netcdf4")
#     recurrence_flows_df = recurrence_flows_ds.to_dataframe()      


### Holoviews object definitions


In [None]:
def get_historical_chars_geo_element(
    data_info: dict,
    data_location_id_like_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = None,       
    geom_gdf = gpd.GeoDataFrame(),    
    measure: str = None,
    measure_min_requested = None,
    measure_max_requested = None,
    opts = {},
) -> hv.Element:
    '''
    
    '''
    # get data with geometry
    data_gdf = db1.get_historical_chars_with_geom(
        data_info = data_info,
        data_location_id_like_string = data_location_id_like_string, 
        start_value_time = start_value_time,
        end_value_time = end_value_time,
        variable_name = variable_name,        
        geom_gdf = geom_gdf,               
    )   
    # if mapping the recurrence interval, add recurrence values
    if measure == 'max_recurr_int':
        data_gdf = db1.add_recurrence_interval(data_gdf, recurrence_flows_df, flow_col_label = "max")   
    if measure == 'max_in_hr':
        data_gdf = data_gdf[data_gdf['upstr_area_km2'].notnull()].copy()
        data_gdf['max_in_hr'] = data_gdf['max'] / (data_gdf['upstr_area_km2']*(1000**2)*(3.28**2)) * 12 * 3600
    
    # subset data based on requested min/max (if any defined, e.g., only > 0 or other threshold)
    if measure_min_requested:
        data_gdf = data_gdf[data_gdf[measure] >= measure_min_requested]
    if measure_max_requested:
        data_gdf = data_gdf[data_gdf[measure] <= measure_max_requested]
    
    # find the actual min/max values of the extracted data for rescaling plots
    measure_min_in_dataset = data_gdf[measure].min()
    measure_max_in_dataset = data_gdf[measure].max()    
          
    #convert to spatialpandas object (required for inspect polygons function)
    data_sdf = spd.GeoDataFrame(data_gdf)   
    
    # check geometry type
    geom_type = data_gdf.geometry.type.iloc[0]
    
    if geom_type == 'Polygon':    
        
        # declare polygon geoviews object           
        #label = f"Mean Areal Precip | {start_value_time} | {end_value_time}"
        map_element_hv = gv.Polygons(
            data_sdf,
            crs=ccrs.GOOGLE_MERCATOR, 
            vdims=[measure, data_info['data_location_id_header']],
            #label = label,
        )  
        map_element_hv.opts(**opts)           
        
    elif geom_type == 'Point':      
        # define data dimensions - more complex for points so plot linkages work
        non_measures = [data_info['geom_location_id_header'], data_info['data_location_id_header'], 
                        'geometry','units','latitude','longitude','easting','northing','nwm_feature_id','upstr_area_km2']
        # custom hover not working, limit to 'measure' column only - possibly make this an argument
        show_all_columns = False
        if show_all_columns:
            all_measures = data_sdf.columns[~data_gdf.columns.isin(non_measures)].to_list()    
        else:
            all_measures = [measure]

        # define dimensions        
        sorted_measures = [measure] + [m for m in all_measures if m!=measure]
        vdims = sorted_measures + [data_info['data_location_id_header'], 'upstr_area_km2']
        kdims = ['easting','northing']
        all_cols_except_geom = vdims + kdims + ['latitude','longitude']

        # leave out geometry - easier to work with the data
        data_df = data_sdf.loc[:,all_cols_except_geom]

        # if mapping the recurrence interval, add recurrence values
        # and sort points so legend appears in order
        if measure == 'max_recurr_int':        
            data_df = data_df.sort_values(measure, ascending = False)     

        # declare points holoviews object   
        label = f"{measure} | {start_value_time} | {end_value_time}"
        map_element_hv = hv.Points(
            data_df, 
            kdims = kdims, 
            vdims = vdims,
            #label = label,
        )
        map_element_hv.opts(**opts, show_legend=False)        
        
        # tooltips = [('ID', '@usgs_site_code'),('Max Flow (cfs)', '@max')]  ###  custom tooltips is not working
        # hover = HoverTool(tooltips=tooltips)            
        # map_element_hv.opts(tools=[hover])

    # reset the data range based on data in the current sample
    map_element_hv.redim.range(**{f"{measure}": (measure_min_in_dataset, measure_max_in_dataset)})  
    #map_element_hv.relabel(label)

    return map_element_hv    

def get_catchment_polygon_hv(
    index: List[int],
    points_dmap: hv.DynamicMap,     
    points_info: dict = {},
    polygons_info: dict = {},
    points_gdf: gpd.GeoDataFrame() = None,
    polygons_gdf: gpd.GeoDataFrame() = None,   
    opts = {},
) -> hv.Element:
    
    if len(index) > 0 and len(points_dmap.dimensions('value')) > 0:    
        point_id = points_dmap.dimension_values(points_info['data_location_id_header'])[index][0]
    else:
        point_id = points_dmap.dimension_values(points_info['data_location_id_header'])[0]
        opts = dict(opts, alpha=0)
        
    polygon_id = points_gdf.loc[points_gdf[points_info['geom_location_id_header']] == point_id, 'catchment_id'].iloc[0]  
    selected_polygon = polygons_gdf.loc[polygons_gdf[polygons_info['geom_location_id_header']]==polygon_id,:]

    polygon_hv = gv.Polygons(selected_polygon, crs=ccrs.GOOGLE_MERCATOR, vdims=[forcing_info['geom_location_id_header']])
    polygon_hv.opts(**opts)     
        
    return polygon_hv


def get_historical_timeseries_ts_element(
    index: List[int],
    points_dmap: hv.DynamicMap,    
    points_info: dict = {},
    polygons_info: dict = {},
    points_gdf: gpd.GeoDataFrame() = None,
    polygons_gdf: gpd.GeoDataFrame() = None,
    variable_name: str = "streamflow", 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,   
    element_type = "curve",
    opts = {},
):
    '''

    '''    
    if len(index) > 0 and len(points_dmap.dimensions('value')) > 0:    
     
        point_id = points_dmap.dimension_values(points_info['data_location_id_header'])[index][0]
        
        if variable_name == "precipitation_flux":
            polygon_id = points_gdf.loc[points_gdf[points_info['geom_location_id_header']] == point_id, 'catchment_id'].iloc[0]  
            title = f"{polygons_info['geom_location_id_header']}: {polygon_id} (Contains Gage: {point_id})"                 
            df = db1.get_historical_timeseries(
                data_info = polygons_info,
                data_location_id_like_string = polygon_id, 
                start_value_time = event_dates_slider.value_start,
                end_value_time = event_dates_slider.value_end,
                variable_name = variable_name,
            )            
            df['value_time_str'] = df['value_time'].dt.strftime('%Y-%m-%d-%H')
            ymax_bars = max(df['value'].max()*1.1,1)
            ymax_curve = max(df['value_cum'].max()*1.1,1)
            df = df.rename(columns = {'value_cum':'Cumulative (in)'})  # work around to get correct label on secondary axis
            t = start_value_time + (end_value_time - start_value_time)*0.01
            text_x = t.replace(second=0, microsecond=0, minute=0).strftime('%Y-%m-%d-%H')
            text_y = ymax_bars*0.9    

            bars = hv.Bars(df, kdims = [('value_time_str','Date')], vdims = [('value', 'Precip Rate (in/hr)')])
            curve = hv.Curve(df, kdims = [("value_time_str", "Date")], vdims = [('Cumulative (in)', 'Precip (in)')])
            text = hv.Text(text_x, text_y, title).opts(text_align='left', text_font_size='10pt', text_color='#57504d', text_font_style='bold')
        
            bars.opts(**opts, fill_color = 'blue', line_color = None, ylim=(0, ymax_bars))
            curve.opts(**opts, color='orange', hooks=[db1.plot_secondary_bars_curve])
            
            ts_element_hv = (bars * curve * text).opts(show_title=False)  ##  must control ylim of secondary axis in hook function!
            
            
            #ts_element_hv = bars * curve
            #ts_element_hv.relabel(label)
            
        elif variable_name == "streamflow":
            title = f"Gage ID: {point_id}"
            df = db1.get_historical_timeseries(
                data_info = points_info,
                data_location_id_like_string = point_id, 
                start_value_time = event_dates_slider.value_start,
                end_value_time = event_dates_slider.value_end,
                variable_name = variable_name,
            )           
            
            upstr_area_km2 = points_dmap.dimension_values('upstr_area_km2')[index][0]
            upstr_area_ft2 = upstr_area_km2*(1000**2)*(3.28**2)
            df["Norm. Flow (in/hr)"] = df['value']/upstr_area_ft2*12*3600
            ymax = max(df['value'].max()*1.1,1)
            t = start_value_time + (end_value_time - start_value_time)*0.01
            text_x = t.replace(second=0, microsecond=0, minute=0)
            text_y = ymax*0.9       

            text = hv.Text(text_x, text_y, title).opts(text_align='left', text_font_size='10pt', text_color='#57504d', text_font_style='bold')
            curve_cfs = hv.Curve(df, ("value_time", "Date"), ("value", "Flow (ft3/s)")) 
            curve_in_hr = hv.Curve(df, ("value_time", "Date"), ("Norm. Flow (in/hr)", "Norm. Flow (in/hr)")) 

            curve_cfs.opts(**opts, color='blue', ylim=(0, ymax))
            curve_in_hr.opts(**opts, color='orange', alpha=0, hooks=[db1.plot_secondary_curve_curve])

            ts_element_hv = (curve_cfs * curve_in_hr * text).opts(show_title=False)  
            
            
    else:        
        df = pd.DataFrame([[0,0],[1,0]], columns = ['Date','value'])
        label = "Nothing Selected"
        curve = hv.Curve(df, "Date", "value").opts(**opts)
        text = hv.Text(0.01, 0.9, "No Selection").opts(text_align='left', text_font_size='10pt', text_color='#57504d', text_font_style='bold')
        ts_element_hv = (curve * text).opts(show_title=False)
            
    return ts_element_hv      

            
def get_aggregator(measure):
    '''
    datashader aggregator function
    '''
    return ds.mean(measure)

### Launch the Dashboard