## Post Event 1 - Explore Event Observed Data

In [None]:
%%capture
!pip install spatialpandas colormap colorcet duckdb

In [None]:
import sys
sys.path.insert(0, '../../')
sys.path.insert(0, '../../evaluation/')
sys.path.insert(0, '../../evaluation/queries/')

import duckdb as ddb
import pandas as pd
import panel as pn
import geopandas as gpd
import numpy as np
import pathlib
import xarray as xr

from datetime import datetime, timedelta
from evaluation import utils, config
import queries
from typing import List

import hvplot.pandas  # noqa
import holoviews as hv
from holoviews import streams
import geoviews as gv
import spatialpandas as spd
import datashader as ds
import cartopy.crs as ccrs
from holoviews.operation.datashader import (
    rasterize, shade, regrid, inspect_points
)
from holoviews.operation.datashader import (
    datashade, inspect_polygons
)
import colorcet as cc

### Static options (set once at start of session, independent of interactive selections) 

In [None]:
# Define configuration (data sources)
forcing_source = config.FORCING_ANALYSIS_ASSIM_PARQUET
flow_source = config.USGS_PARQUET

# Source of recurrence flow magnitudes per location
recurrence_file = pathlib.Path("../data/nwm_v21_recurrence_flows_17C.nc")  ## temporary, put in config if keeping
high_flow_threshold = "2_0_year_recurrence_flow_17C"

# gage upstream basin info - TEMPORARY, these boundaries are not good, have holes, etc.
# eventually include other characteristic info - mean upstream slope, %imperv, soils, etc.
gage_basin_info_file = pathlib.Path("../data/nwm_gage_basin_polygons.feather")

# source and header (resolution) of MAP polygons corresponding to data in 'forcing_source'
polygon_file = pathlib.Path("../data/HUC10_Simp005_dd.geojson")            ## temporary, eventually resolve which layer, how to simplify w/o gaps, 
polygon_id_header = "HUC10"                                                ## if/how to allow different MAP resolution...

# source of HUC2 polygons - for reference only in maps
huc2_file = pathlib.Path("../data/HUC2_Simp01_RemSPac.geojson")

# Units applies to all data
# english -> inches for precp, cfs for flow
# metric -> mm for precip, cms for flow
units = "english"


### Read static/independent data

In [None]:
# read in usgs points (if not already in memory - prevent annoying rereading)
if not "points_gdf" in locals():
    points_gdf = utils.get_usgs_gages()
    points_gdf = points_gdf.to_crs("EPSG:3857")
    
# read gage_basins, calculate area
if not "gage_basins_gdf" in locals():
    gage_basins = gpd.read_feather(gage_basin_info_file)
    gage_basins['area_m2'] = gage_basins.to_crs("EPSG:3857").geometry.area
    
# read in recurrence flows (if not already in memory - prevent annoying rereading)
if not "df_recurrence" in locals():
    ds_recurrence = xr.open_dataset(recurrence_file, engine="netcdf4")
    df_recurrence = ds_recurrence.to_dataframe()      
    
# read in polygons (if not already in memory - prevent annoying rereading)
if not "polygon_gdf" in locals():
    polygon_gdf = gpd.read_file(polygon_file).to_crs("EPSG:3857")
    polygon_gdf = polygon_gdf[[polygon_id_header,"geometry"]]
    
if not "huc2_gdf" in locals():
    huc2_gdf_mult = gpd.read_file(huc2_file)
    huc2_gdf = gpd.GeoDataFrame()
    for i, polys in enumerate(huc2_gdf_mult.geometry):
        row = huc2_gdf_mult.loc[[i],["huc2","name","geometry"]]
        for poly_part in polys.geoms:  
            row["geometry"] = poly_part
            huc2_gdf = pd.concat([huc2_gdf, row], axis = 0)

### Query-building

In [None]:
def get_historical_filters(
    source: str, 
    location_id_header: str,
    location_id_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    exclude_negative_values = True,
) -> dict:
    '''
    Build filter portion of query to extract historical timeseries by region 
    (portion of ID) and value_time range
    '''
    filters = []
    if location_id_string != "all":
        filters.append(
            {
                "column": f"{location_id_header}",
                "operator": "like",
                "value": f"{location_id_string}%"
            }
        )
    else:
        filters.append(
            {
                "column": f"{location_id_header}",
                "operator": "<>",
                "value": ""
            }
        )
    if start_value_time is not None:
        filters.append(
            {
                "column": "value_time",
                "operator": ">=",
                "value": f"{start_value_time}"
            }  
        )
    if end_value_time is not None:
        filters.append(
            {
                "column": "value_time",
                "operator": "<=",
                "value": f"{end_value_time}"
            }  
        )
    if exclude_negative_values:
        filters.append(
            {
                "column": "value",
                "operator": ">=",
                "value": 0
            }  
        )
    return filters


def get_historical_timeseries_query(
    source: str, 
    location_id_header: str, 
    filters: List[dict]
) -> str:    
    '''
    Build SQL query to extract historical timeseries by region 
    (portion of HUC ID) and value_time range
    ''' 
    query = f"""
        SELECT 
            *
        FROM read_parquet('{source}/*.parquet')
        WHERE 
            {" AND ".join(queries.format_filters(filters))}
        ORDER BY
            "{location_id_header}", value_time
    ;"""
    return query


def get_historical_timeseries_chars_query(
    source: str, 
    group_by: List[str],
    order_by: List[str],
    filters: List[dict]
) -> str:    
    '''
    Build SQL query to extract characteristics of timeseries within
    defined value_time range by region (portion of HUC ID) 
    '''    
    query = f"""
        SELECT 
            {",".join(group_by)},
            sum(value) as sum,
            max(value) as max,
            min(value) as min,
            mean(value) as mean,
            var_pop(value) as variance,
            any_value(measurement_unit) as units
        FROM read_parquet('{source}/*.parquet')
        WHERE 
            {" AND ".join(queries.format_filters(filters))}
        GROUP BY
            {",".join(group_by)}
        ORDER BY 
            {",".join(order_by)}
    ;"""
    return query

### Query-running

In [None]:
def get_historical_timeseries(
    source: str, 
    location_id_header: str,
    location_id_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
) -> pd.DataFrame:
    '''
    Run DuckDB query to extract historical timeseries by region 
    (portion of ID) and value_time range
    '''   
    # build filters
    filters = get_historical_filters(
        source, 
        location_id_header, 
        location_id_string, 
        start_value_time, 
        end_value_time
    )
    #build query
    query = get_historical_timeseries_query(
        source, 
        location_id_header,
        filters=filters
    )
    #run query
    df = ddb.query(query).to_df()

    return df


def get_historical_timeseries_chars(
    source: str, 
    location_id_header: str,
    location_id_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
) -> pd.DataFrame:
    '''
    Run DuckDB query to extract characteristics of timeseries within
    defined value_time range by region (portion of HUC ID) 
    '''
    # build filters
    filters = get_historical_filters(
        source, 
        location_id_header, 
        location_id_string, 
        start_value_time, 
        end_value_time,
    )
    #build query    
    query = get_historical_timeseries_chars_query(
        source, 
        group_by=[location_id_header],
        order_by=[location_id_header],
        filters=filters
    )
    #run query    
    df = ddb.query(query).to_df()
    
    return df


def get_parquet_date_range(source) -> pd.Timestamp:
    '''
    Query parquet files for defined fileset (source) and
    return the min/max value_times in the files
    '''    
    query = f"""
        SELECT count(distinct(value_time)) as count,
        min(value_time) as start_time,
        max(value_time) as end_time
        FROM read_parquet('{source}/*.parquet')
    ;"""
    df = ddb.query(query).to_df()
    return df.start_time[0], df.end_time[0]

### Data transform/processing

In [None]:
def add_recurrence_interval(df_flow, df_recurr, flow_col_label = 'max'):
    '''
    Determine the highest defined recurrence interval flow that was exceeded by the max_flow
    !!! currently assumes column headers are of the format
        "X_0_year_recurrence_flow" and extracts the X value (as in nwm recurrence flow netcdf)
    '''
    # number of locations
    n_locations = len(df_flow)

    # get subset of recurrence flows for nwm_features in flow dataframe
    df_recurr_sub = df_recurr.loc[df_flow['nwm_feature_id']]
    recurr_flow_matrix = df_recurr_sub.to_numpy()
    
    # Get the recurrence intervals of the maximum flows

    # create a tiled matrix of the recurrence intervals (years)
    # repeating a row of the interval numbers, nlocations times
    # **currently assumes column headers are of the format
    #   "X_0_year_recurrence_flow" and extracts the X value
    ncol = len(df_recurr_sub.columns)
    recurr_labels = df_recurr_sub.columns.to_list()
    recurr_vals = np.array([int(i.split("_")[0]) for i in recurr_labels])
    recurr_vals_tiled = np.tile(recurr_vals,(n_locations,1))    
    
    # create a tiled matrix of the maximum flow for each reach, 
    # repeating the max_flow column for each column of the recurr
    # flow matrix for comparison     
    flow_data = df_flow[flow_col_label]
    flow_matrix = np.tile(flow_data,(ncol,1)).transpose() 
    
    # get matrix of where the recurrence flows were exceeded by the max flow
    exceed_recurr_flows = flow_matrix > recurr_flow_matrix

    # Create a new matrix with values equal to the recurr interval value (years)
    # if the max flow exceeded the recurrence flow (exceed_recurr_flows = True)
    recurr_vals_matrix = np.where(exceed_recurr_flows, recurr_vals_tiled, 0)    
    
    # then find the maximum recurr interval exceeded by calculating the max
    # across columns. This is the highest tabulated recurrence interval that 
    # was exceeded by the max flow (i.e., the recurrence category in the 
    # High Water Magnitude Product)
    col_label = 'max_recurr_int'
    df_flow[col_label] = np.amax(recurr_vals_matrix, axis = 1)    
    
    # reorder columns to put max_recurr_int next to max flow
    cols = df_flow.columns.to_list()
    i = cols.index(flow_col_label)
    cols_reordered = cols[:i+1] + cols[-1:] + cols[i+1:-1] 
    #cols_reordered = cols[:i+1] + cols[-2:] + cols[i+1:-2]
    df_flow = df_flow[cols_reordered].copy()    
    
    # add flag to indicate that all recurrence flows are equal (i.e., 2-yr flow = 100-yr flow), 
    # likely bad freq. analysis results
    ind = df_recurr.loc[df_recurr.iloc[:,0] == df_recurr.iloc[:,-1]].index.to_list()
    df_flow.loc[df_flow.index.isin(ind),'qual'] = 'recurr_all_equal'    
    
    return df_flow    


def convert_units(
    df: pd.DataFrame,
    variable_name: str,
    value_column: str,
    units: str,    
) -> pd.DataFrame:
    
    # if forcing, convert from rate to depth, then check units
    if variable_name == "precipitation_flux":       
        df[value_column] = round(df[value_column]*60*60, 2)
        if units == "metric" and df["units"][0].find("mm") < 0:
            df[value_column] = df[value_column] * 25.4
        elif units == "english" and df["units"][0].find("in") < 0:
            df[value_column] = df[value_column] / 25.4
            
    if variable_name == "streamflow":
        if units == "metric" and df["units"][0].find("m") < 0:
            df[value_column] = df[value_column] * 0.0283
        elif units == "english" and df["units"][0].find("ft") < 0:
            df[value_column] = df[value_column] / 0.0283
        
    return df

### Widgets definitions

In [None]:
def get_event_date_range_slider(source):  
    '''
    Date range slider to select start and end dates of the event
    '''
    start_date, end_date = get_parquet_date_range(source)
    event_dates_slider = pn.widgets.DatetimeRangeSlider(
        name='Event start/end dates',
        start=start_date, 
        end=end_date,
        # default to start date plus 2 weeks
        value=(start_date, start_date + timedelta(days = 14)),
        step=1000*60*60,
        bar_color = 'green',
        width_policy="fit"
    )

    return event_dates_slider

def get_huc_selector():
    '''
    HUC2 region to explore, enables smaller region for faster responsiveness
    '''
    hucs=[
        "all",
        "01",
        "02",
        "03",
        "04",
        "05",
        "06",
        "07",
        "08",
        "09",
        "10",
        "11",
        "12",
        "13",
        "14",
        "15",
        "16",
        "17",
        "18",
    ]
    huc_selector = pn.widgets.Select(name='HUC2', options=hucs, value="all", width_policy="fit")
    return huc_selector       

def get_precip_measure_selector():
    measures = [
            "sum",
            "max",
            "min",
        ]  
    precip_measure_selector = pn.widgets.Select(name='Measure', options=measures, value=measures[0], width_policy="fit") 
    return precip_measure_selector

def get_flow_measure_selector():
    measures = [
            "sum",
            "max",
            "min",
        ]  
    flow_measure_selector = pn.widgets.Select(name='Measure', options=measures, value=measures[1], width_policy="fit") 
    return flow_measure_selector

### Utilities for plot style, background, colors

In [None]:
def get_precip_colormap():
    ''' 
    build custom precip colormap 
    '''
    cmap1 = cc.CET_L6[85:]
    cmap2 = [cmap1[i] for i in range(0, len(cmap1), 3)]
    ext = [cmap2[-1] + aa for aa in ['00','10','30','60','99']]
    cmap = ext + cmap2[::-1] + cc.CET_R1
    return cmap

def get_recurr_colormap():
    ''' 
    build explicit colormap for 2, 5, 10, 25, 50, 100 recurrence intervals
    based on OWP High Flow Magnitude product
    '''    
    cmap = {0: 'lightgray', 
            2: 'dodgerblue', 
            5: 'yellow', 
            10: 'darkorange', 
            25: 'red', 
            50: 'fuchsia', 
            100: 'darkviolet'}
    
    return cmap
    
def get_basemap_gv(opts):
    '''
    get OSM basemap as geoviews object
    '''
    tiles = gv.tile_sources.OSM.opts(**opts)
    return tiles

### Data selection and organizing

In [None]:
def get_aggregator(measure):
    '''
    datashader aggregator function
    '''
    return ds.mean(measure)

def merge_df_with_gdf(
    gdf, 
    geom_id_header: str, 
    df,
    location_id_header: str, 
) -> gpd.GeoDataFrame:
    '''
    merge data df (result of DDB query) with geometry, return a geodataframe
    '''
    # merge df with geodataframe
    merged_gdf = gdf.merge(df, left_on=geom_id_header, right_on=location_id_header)    

    # if IDs are HUC codes, convert to type 'category'
    if any(s in location_id_header for s in ["HUC","huc"]):
        print(f"converting column {location_id_header} to category")
        merged_gdf[location_id_header] = merged_gdf[location_id_header].astype("category")
    
    return merged_gdf

def subset_df_by_measure_min_max(
    df: pd.DataFrame,
    measure: str,
    measure_min: float = None,
    measure_max: float = None,
) -> pd.DataFrame:
    
    # subset data based on min/max (if any)
    subset_df = df.copy()
    if measure_min:
        subset_df = df[df[measure] >= measure_min]
    if measure_max:
        subset_df = subset_df[subset_df[measure] <= measure_max]
        
    return subset_df

In [None]:
def get_historical_timeseries_chars_merged_gdf(
    gdf, 
    geom_id_header: str, 
    source: str, 
    location_id_header: str, 
    location_id_string: str, 
    start_value_time: pd.Timestamp = None,
    end_value_time: pd.Timestamp = None,
    variable_name = 'streamflow',    
    measure: str = 'max',
    units = 'english',
    measure_min = None,
    measure_max = None,
) -> gpd.GeoDataFrame:
    '''
    query data
    convert/transform/process data
    merge with geometry
    '''
    # query timeseries characteristics
    df = get_historical_timeseries_chars(
        source, 
        location_id_header, 
        location_id_string, 
        start_value_time, 
        end_value_time,
    )
    # convert units if needed  
    converted_df = convert_units(
        df, 
        variable_name,
        measure,
        units,
    )      
    # subset data based on min/max (if any)
    subset_df = subset_df_by_measure_min_max(
        converted_df,
        measure,
        measure_min,
        measure_max,
    )
        
    # merge with geodataframe    
    merged_gdf = merge_df_with_gdf(
        gdf, 
        geom_id_header, 
        subset_df, 
        location_id_header
    )        
    # add recurrence flow levels of the peak flows
    if variable_name == "streamflow":
        merged_gdf = add_recurrence_interval(merged_gdf, df_recurrence, flow_col_label = 'max')
        merged_gdf = merged_gdf.merge(df_recurrence[["2_0_year_recurrence_flow_17C"]], right_on = "feature_id", left_on = "nwm_feature_id")
        merged_gdf['max_rel_to_2yr'] = merged_gdf["max"]/merged_gdf["2_0_year_recurrence_flow_17C"]
        
    return merged_gdf

In [None]:


start_date, end_date = get_parquet_date_range(forcing_source)
#event_dates_slider = get_event_date_range_slider(forcing_source)
huc_selector = get_huc_selector()
#precip_measure_selector = get_precip_measure_selector()
flow_measure_selector = get_flow_measure_selector()

merged_gdf = get_historical_timeseries_chars_merged_gdf(
    gdf = points_gdf,
    geom_id_header = "gage_id",
    source = flow_source,    
    location_id_header = "usgs_site_code",
    location_id_string = "", 
    start_value_time = start_date, #event_dates_slider.param.value_start,
    end_value_time = end_date, #event_dates_slider.param.value_end,
    variable_name = "streamflow",
    measure = "max", #flow_measure_selector.param.value,
    units = units,
    measure_min = 0.01,
    measure_max = None,
)

In [None]:
## This works

measure = 'max'
points_df = merged_gdf.loc[:,['longitude','latitude','gage_id',measure]]
points_hv = hv.Points(points_df, vdims=[measure])
points_hv.opts(width=700, color=hv.dim(measure), cmap='viridis_r', colorbar=True, cnorm='log')
hist = points_df.hvplot(width=700, y=measure, kind='hist', responsive=True, min_height=200)

ls = hv.link_selections.instance()
layout = (ls(points_hv + hist)).cols(1)
layout

In [None]:
## This works- switch to recurrence int

measure = 'max_recurr_int'
points_df = merged_gdf.loc[:,['longitude','latitude','gage_id',measure]]
points_hv2 = hv.Points(points_df, vdims=[measure])
points_hv2.opts(width=700, color=hv.dim(measure), cmap='viridis_r', colorbar=True, cnorm='log')
hist2 = points_df.hvplot(width=700, y=measure, kind='hist', responsive=True, min_height=200)

ls = hv.link_selections.instance()
layout = (ls(points_hv2 + hist2)).cols(1)
layout

In [None]:
## This works - custom colors

measure = 'max_recurr_int'
points_df = merged_gdf.loc[:,['longitude','latitude','gage_id',measure]]
points_df = points_df.sort_values(measure, ascending = True)

points_hv3 = hv.Points(points_df, vdims=[measure])
points_hv3.opts(width=700, color=hv.dim(measure), cmap=get_recurr_colormap(), colorbar=True, size = 5)
peaks_hist3 = points_df.hvplot(width=700, y=measure, kind='hist', responsive=True, min_height=200)

ls = hv.link_selections.instance()
layout = (ls(points_hv3 + peaks_hist3)).cols(1)
layout

In [None]:
## This works - separate out zero point layer

measure = 'max_recurr_int'
points_df = merged_gdf.loc[:,['longitude','latitude','gage_id',measure]]
points_df = points_df.sort_values(measure, ascending = False)
points_sub_df = points_df[points_df[measure] > 0]
points_0_df = points_df[points_df[measure] == 0].copy()

points_sub_hv = hv.Points(points_sub_df, vdims=[measure])
points_sub_hv.opts(width=700, height=400, color=hv.dim(measure), cmap=get_recurr_colormap(), legend_position = 'bottom_right', size = 5)

points_0_hv = hv.Points(points_0_df, vdims=[measure])
points_0_hv.opts(width=700, color=hv.dim(measure), cmap=['lightgray'], show_legend = False, size = 2)

peaks_sub_hist = points_sub_df.hvplot(width=700, y=measure, kind='hist', responsive=True, min_height=200)

ls = hv.link_selections.instance()
layout = (ls(points_0_hv * points_sub_hv + peaks_sub_hist)).cols(1)
layout

In [None]:
print(layout)

In [None]:
## This works but EXTREMELY slow (geodataframe instead of dataframe)

measure = 'max_recurr_int'
points_df = merged_gdf    #.loc[:,['longitude','latitude','gage_id',measure]]    
points_df = points_df.sort_values(measure, ascending = False)
points_sub_df = points_df[points_df[measure] > 0]
points_0_df = points_df[points_df[measure] == 0].copy()

points_sub_hv = hv.Points(points_sub_df, vdims=[measure])
points_sub_hv.opts(width=700, height=400, color=hv.dim(measure), cmap=get_recurr_colormap(), legend_position = 'bottom_right', size = 5)

points_0_hv = hv.Points(points_0_df, vdims=[measure])
points_0_hv.opts(width=700, color=hv.dim(measure), cmap=['lightgray'], show_legend = False, size = 2)

peaks_sub_hist = points_sub_df.hvplot(width=700, y=measure, kind='hist', responsive=True, min_height=200)

ls = hv.link_selections.instance()
layout = (ls(points_0_hv * points_sub_hv + peaks_sub_hist)).cols(1)
layout

In [None]:
print(layout)

In [None]:
print(points_sub_hv)

In [None]:
## This works faster - convert to spatialpandas geodataframe
## layout shifts to the right for some reason

points_df = spd.GeoDataFrame(merged_gdf)   

measure = 'max_recurr_int'
points_sdf = spd.GeoDataFrame(merged_gdf)   
points_sdf = points_sdf.sort_values(measure, ascending = False)
points_sub_sdf = points_sdf[points_sdf[measure] > 0]
points_0_sdf = points_sdf[points_sdf[measure] == 0].copy()
points_sub_hv = hv.Points(points_sub_sdf, vdims=[measure])

points_sub_hv = hv.Points(points_sub_sdf, vdims=[measure])
points_sub_hv.opts(width=700, height=400, color=hv.dim(measure), cmap=get_recurr_colormap(), legend_position = 'bottom_right', size = 5)

points_0_hv = hv.Points(points_0_sdf, vdims=[measure])
points_0_hv.opts(width=700, color=hv.dim(measure), cmap=['lightgray'], show_legend = False, size = 2)

# build histogram directly from points holoview obj
points_sub_hist = hv.operation.histogram(points_sub_hv, bin_range=(2, 100), dimension=measure) 
points_sub_hist.opts(width=700)

ls = hv.link_selections.instance()
layout = (ls(points_0_hv * points_sub_hv + points_sub_hist)).cols(1)
layout

In [None]:
## This works faster - convert to spatialpandas geodataframe, use panel for layout

from holoviews.element import tiles
esri = tiles.ESRI().redim(x='easting', y='northing')
osm = tiles.OSM().redim(x='easting', y='northing')

points_df = spd.GeoDataFrame(merged_gdf)   

measure = 'max_recurr_int'
points_sdf = spd.GeoDataFrame(merged_gdf)   
points_sdf = points_sdf.sort_values(measure, ascending = False)
points_sub_sdf = points_sdf[points_sdf[measure] > 0]
points_0_sdf = points_sdf[points_sdf[measure] == 0].copy()
points_sub_hv = hv.Points(points_sub_sdf, vdims=[measure])

points_sub_hv = hv.Points(points_sub_sdf, vdims=[measure])
points_sub_hv.opts(width=700, height=400, color=hv.dim(measure), cmap=get_recurr_colormap(), legend_position = 'bottom_right', size = 5)

points_0_hv = hv.Points(points_0_sdf, vdims=[measure])
points_0_hv.opts(width=700, color=hv.dim(measure), cmap=['lightgray'], show_legend = False, size = 2)

points_sub_hist = hv.operation.histogram(points_sub_hv, bin_range=(2, 100), dimension=measure) 
points_sub_hist.opts(width=700)

ls = hv.link_selections.instance()
map_layout = (ls(osm * points_0_hv * points_sub_hv + points_sub_hist)).cols(1)

pn_layout = pn.Column(map_layout)
pn_layout

In [None]:
type(points_sub_hv)

In [None]:
print(points_sub_hv)

In [None]:
## This works - must use easting/northing to overlay dataframe with basemap

from holoviews.element import tiles
esri = tiles.ESRI().redim(x='easting', y='northing')
osm = tiles.OSM().redim(x='easting', y='northing')

measure = 'max_recurr_int'
merged_gdf['easting'] = merged_gdf.geometry.x
merged_gdf['northing'] = merged_gdf.geometry.y
points_df = merged_gdf.loc[:,['easting','northing','gage_id',measure]]
points_df = points_df.sort_values(measure, ascending = False)
points_sub_df = points_df[points_df[measure] > 0]
points_0_df = points_df[points_df[measure] == 0].copy()

frame_width = 700
points_sub_hv = hv.Points(points_sub_df, vdims=[measure])
points_sub_hv.opts(
    frame_width=frame_width, frame_height = round(frame_width * 4/7), color=hv.dim(measure), 
    cmap=get_recurr_colormap(), legend_position = 'bottom_right', size = 5, xaxis=None, yaxis=None, toolbar='right')

points_0_hv = hv.Points(points_0_df, vdims=[measure])
points_0_hv.opts(color=hv.dim(measure), cmap=['lightgray'], show_legend = False, size = 2)

peaks_sub_hist = points_sub_df.hvplot(frame_width=frame_width, y=measure, kind='hist', responsive=True, min_height=200)

ls = hv.link_selections.instance()
layout = (ls(osm * points_0_hv * points_sub_hv + peaks_sub_hist)).cols(1)
layout

In [None]:
type(points_sub_hv)

In [None]:
print(points_sub_hv)

In [None]:
# define dataset and select subset from dataset
#  cannot alter size with width and height... 

from holoviews.element import tiles
esri = tiles.ESRI().redim(x='easting', y='northing')
osm2 = tiles.OSM().redim(x='easting', y='northing')

width = 700

measures = ['sum','max','min','mean','variance','max_recurr_int']
vdims = measures + ['gage_id','easting','northing']
cols = vdims + ['latitude','longitude']
kdims = ['longitude','latitude']

df = merged_gdf.loc[:,cols]
data = hv.Dataset(df, kdims, vdims)
data_abv = data.select(max_recurr_int=(1,101))
data_0 = data.select(max_recurr_int=0)

In [None]:
points = data.to(hv.Points, ['longitude', 'latitude'], 'max_recurr_int')
points.opts(color=hv.dim(measure), cmap=get_recurr_colormap())

In [None]:
points = data_abv.to(hv.Points, ['longitude', 'latitude'], 'max_recurr_int')
points.opts(color=hv.dim(measure), cmap=get_recurr_colormap())

In [None]:
start_date, end_date = get_parquet_date_range(forcing_source)
#event_dates_slider = get_event_date_range_slider(forcing_source)
huc_selector = get_huc_selector()
#precip_measure_selector = get_precip_measure_selector()
flow_measure_selector = get_flow_measure_selector()

merged_gdf = get_historical_timeseries_chars_merged_gdf(
    gdf = points_gdf,
    geom_id_header = "gage_id",
    source = flow_source,    
    location_id_header = "usgs_site_code",
    location_id_string = "", 
    start_value_time = start_date, #event_dates_slider.param.value_start,
    end_value_time = end_date, #event_dates_slider.param.value_end,
    variable_name = "streamflow",
    measure = "max", #flow_measure_selector.param.value,
    units = units,
    measure_min = 0.01,
    measure_max = None,
)

In [None]:

merged_sdf['easting'] = merged_sdf.geometry.x
merged_sdf['northing'] = merged_sdf.geometry.y   

In [None]:
measure = 'max_recurr_int'
merged_gdf['easting'] = merged_gdf.geometry.x
merged_gdf['northing'] = merged_gdf.geometry.y
keepcols = points_df.columns[~points_df.columns.isin(['geometry'])]
vdimcols = points_df.columns[~points_df.columns.isin(['easting','northing', measure])]

points_df = merged_gdf.loc[:,keepcols]
points_df = points_df.sort_values(measure, ascending = False)
points_sub_df = points_df[points_df[measure] > 0]
points_0_df = points_df[points_df[measure] == 0].copy()

points_sub_hv2 = hv.Points(points_sub_df, vdims=([measure] + vdimcols.to_list()))
points_sub_hv2.opts(
    width=500, 
    height = round(width * 4/7), 
    color=hv.dim(measure),     
    #aspect = 7/4, responsive = True, 
    cmap=get_recurr_colormap(), 
#        legend_position = 'right', legend_offset=(0,0), 
        legend_position = 'bottom_right',
        size = 5, xaxis=None, yaxis=None, toolbar='right')

points_0_hv2 = hv.Points(points_0_df, vdims=([measure] + vdimcols.to_list()))
points_0_hv2.opts(color=hv.dim(measure), cmap=['lightgray'], show_legend = False, size = 2)

emin, emax = points_sub_df['easting'].min(), points_sub_df['easting'].max()
nmin, nmax = points_sub_df['northing'].min(), points_sub_df['northing'].max()

peaks_sub_hist2 = points_sub_df.hvplot(y=measure, kind='hist', responsive=True, min_height=200)

df = merged_gdf[['gage_id','sum','max','min','mean','variance','max_recurr_int']]
scatter2 = hv.Scatter(df, 'max', 'min')
scatter2.opts(responsive = True, toolbar = 'above', size = 5)

pn.extension(sizing_mode='stretch_width')
ls = hv.link_selections.instance()
layout_linked_hist = ((ls(osm2 * points_0_hv2[emin:emax, nmin:nmax] * points_sub_hv2[emin:emax, nmin:nmax] + peaks_sub_hist2)).cols(1))
layout = \
    pn.Column(
        pn.Row(
            pn.pane.PNG('https://ciroh.ua.edu/wp-content/uploads/2022/08/CIROHLogo_200x200.png', width=100),
            pn.pane.Markdown(
                """
                # CIROH Exploratory Evaluation Toolset
                ## Post-Event Observed Data Exploration
                """,
                width=800
            ),
        ),
        pn.Row(
            scatter2,
            layout_linked_hist,
            ),
    )
layout

In [None]:
# build hist and scat off of points

measures = ['sum','max','min','mean','variance','max_recurr_int']
vdims = measures + ['gage_id','easting','northing']
cols = vdims + ['latitude','longitude']
kdims = ['longitude','latitude']

df_ds = merged_gdf.loc[:,cols]
data = hv.Dataset(df_ds, kdims, vdims)

points = hv.Points(data, kdims = ['longitude','latitude'], vdims = ['max_recurr_int', 'sum','max','min','mean','variance'])
points.opts(width = 700, height=400, color=hv.dim(measure), cmap=get_recurr_colormap())

hist = hv.operation.histogram(points, bin_range=(2, 100), dimension = 'max_recurr_int')

scat = hv.Scatter(df_ds, kdims = ['sum'], vdims = ['max','max_recurr_int','latitude','longitude'])

pn.extension(sizing_mode='stretch_width')
ls = hv.link_selections.instance()
linked = ls(scat + points + hist)
linked.opts(toolbar='right')

layout = pn.Row(linked)
layout