In [None]:
#configuration
import os
import sys

DATA_DIR = "../hydro_dash/data/studies"
# adding project dirs to path so code may be referenced from the notebook
sys.path.insert(0, '..')
sys.path.insert(0, '../evaluation/')
sys.path.insert(0, '../evaluation/queries/')

from evaluation import utils, config
import queries2 # need to fix path to use original queries
#import dask_geopandas
import duckdb as ddb
#import spatialpandas as sp
#import hvplot.pandas # noqa


In [None]:
%%time

from evaluation import utils, config
import queries2 # need to fix path to use original queries
import dask_geopandas

def get_all_catchment_metrics():
    basins_gdf = utils.parquet_to_gdf(config.HUC10_PARQUET_FILEPATH)
    query = queries2.calculate_catchment_metrics(
        config.MEDIUM_RANGE_FORCING_PARQUET,
        config.FORCING_ANALYSIS_ASSIM_PARQUET,
        group_by=["reference_time, catchment_id"],
        order_by=["reference_time, catchment_id"],
        filters=[
            {
                "column": "1",
                "operator": "=",
                "value": 1
            },
#            {
#                "column": "catchment_id",
#                "operator": "like",
#                "value": "" + huc2.value + "%"
#            },
#            {
#                "column": "reference_time",
#                "operator": "=",
#                "value": "2023-01-01 18:00:00"
#            }            
       ]
    )
    df = ddb.query(query).to_df()
    gdf_map = basins_gdf.merge(df, left_on="huc10", right_on="catchment_id")
    return gdf_map


dask_df = dask_geopandas.from_geopandas(get_all_catchment_metrics(), npartitions=20) #convert function to read parquet with dask
dask_df.head(3)

In [None]:
%%time
dask_df['huc2'] = dask_df['huc10'].str[:2]
#print(gdf.dtypes)
dask_df['huc2'] = dask_df['huc2'].astype("int")
dask_df['huc10'] = dask_df['huc10'].astype("string")
dask_df['name'] = dask_df['name'].astype("string")
dask_df['reference_time'] = dask_df['reference_time'].astype("datetime64[ns]")
dask_df['catchment_id'] = dask_df['catchment_id'].astype("string")
dask_df['time'] = dask_df['reference_time'].astype(int)/ 10**9
dask_df['time'] = dask_df['time'].astype("int") # needs the extra pass to actually maintain int type
dask_df['huc2'] = dask_df['huc2'].astype("int") 
dask_df.to_crs("EPSG:3395")

dask_df.dtypes

In [None]:
import holoviews as hv, geoviews as gv, param, dask.dataframe as dd, cartopy.crs as crs
import panel as pn
from datetime import datetime as dt
from bokeh.models import HoverTool
#import datetime as dt
import datashader as ds
from spatialpandas import GeoSeries, GeoDataFrame
from colormap import rgb2hex
import logging
from shapely.geometry import Point
import dask

from colorcet import cm
from holoviews.operation.datashader import rasterize, shade, regrid, inspect_points
from holoviews.operation.datashader import (
    datashade, inspect_polygons
)
from holoviews.streams import RangeXY, Pipe, Tap, Selection1D
from holoviews.util.transform import easting_northing_to_lon_lat

hv.extension('bokeh', logo=False)
opts = dict(width=700,
            height=500,
            #xaxis=None,
            #yaxis=None,
            #bgcolor='black',
            show_grid=False)
cmaps = ['fire','bgy','bgyw','bmy','gray','kbc']


class HydroExplorer(param.Parameterized):
    renderer = hv.renderer('bokeh')
    #alpha      = param.Magnitude(default=0.75, doc="Alpha value for the map opacity")
    #cmap       = param.ObjectSelector(cm['bgyw'], objects={c:cm[c] for c in cmaps})
    _min_time, \
    _max_time, \
    huc_list, \
    time_list = dask.compute(dask_df.time.min(), 
                             dask_df.time.max(),
                             dask_df.huc2.unique(),
                             dask_df.time.unique()
                             )
    
    huc2       = param.ObjectSelector(default=1, objects=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18])
    time       = param.ObjectSelector(default=int(_min_time), objects=list(time_list))
    _tap_stream = Tap(transient=False)
    
    @param.depends('huc2','time')
    def get_polygon(self):
        rslt_df = dask_df[(dask_df['huc2']==self.huc2) & (dask_df['time']==self.time)]
        rslt_df = rslt_df.to_crs("EPSG:3395")
        rslt_df['name'] = rslt_df['name'].astype('category')
        polygon = gv.Polygons(GeoDataFrame(rslt_df.compute()), #hover functionality needs spatialpandas dataframe to work
                            crs=crs.GOOGLE_MERCATOR, 
                            vdims=['bias', 'name', 'catchment_id'])

        polygon.opts(color_index='bias',colorbar=True, tools=['tap'])
        return polygon


    @param.depends('huc2','time')
    def map_plot2(self):
        polygon = hv.DynamicMap(self.get_polygon)#, kdims=['huc2','time']).redim.values(huc2=self.huc_list, time=self.time_list).opts(framewise=True)
        shaded = datashade(polygon, aggregator=ds.min('bias'))
        shaded.opts(tools=['tap'], alpha=0.75)
        rslt_df = dask_df[(dask_df['huc2']==self.huc2) & (dask_df['time']==self.time)]
        
        tiles = gv.tile_sources.StamenTerrain().apply.opts(alpha=0.75, **opts)
        #tiles.opts(framewise=True)

        tooltips=[('Name', '@name'), ('Catchment ID', '@catchment_id')]
        hover_tool = HoverTool(tooltips=tooltips)
        hover = inspect_polygons(shaded).opts(fill_color='yellow', tools=[hover_tool]).opts(alpha=0.9)
        #hover.opts(framewise=True)
        self._tap_stream.source = shaded
        
        return (tiles * shaded * hover).opts(framewise=True)
    
    def get_table_dmap(self):
        return hv.DynamicMap(self.plot_table, streams=[self._tap_stream])

    #@pn.depends(_tap_stream.param.x,_tap_stream.param.y)
    def plot_table(self,x,y):
        if x is None:
            x,y = 0,0
        x,y = easting_northing_to_lon_lat(x, y)
        pnt = Point(x, y)
        rslt = dask_df[(dask_df.contains(pnt) == True)]
        target_fields = ['huc10', 'name', 'reference_time', 'catchment_id', 
                         'intercept', 'covariance', 'corr', 'r_squared', 
                         'forecast_count', 'observed_count', 'forecast_average', 'observed_average', 
                         'forecast_variance', 'observed_variance', 'max_forecast_delta', 'bias']
        return hv.Table(rslt[target_fields].compute())

    @pn.depends(_tap_stream.param.x,_tap_stream.param.y)
    def plot_forecast_diff(self,x,y):
        if x is None:
            x,y = 0,0
        x,y = easting_northing_to_lon_lat(x, y)
        pnt = Point(x, y)
        rslt = dask_df[(dask_df.contains(pnt) == True)]
        target_fields = ['name', 'reference_time', 'forecast_average', 'observed_average']
        rslt = rslt[target_fields].compute()

        forecast_avg = hv.Curve(rslt, 'reference_time', 'forecast_average', label='forecast_average').opts(tools=['hover'])
        observed_avg = hv.Curve(rslt, 'reference_time', 'observed_average', label='observed_average').opts(tools=['hover'])
        #overlay = hv.NdOverlay(forecast_avg * observed_avg)
        viz = forecast_avg * observed_avg
        viz.opts(width=1200)
        label = ""
        if rslt.empty == False:
            label = rslt['name'].iloc[0]
        return viz.relabel(label)
    
hydro = HydroExplorer(name="data explorer")

pn.Column(pn.Row(hydro.map_plot2,
          pn.Param(hydro.param, 
                   widgets={'time': pn.widgets.DiscretePlayer})
          , sizing_mode="stretch_both"),
          hydro.plot_forecast_diff,
         hydro.get_table_dmap().opts(width=1200)).servable()