In [None]:
%matplotlib inline

# adding project dirs to path so code may be referenced from the notebook
import sys
sys.path.insert(0, '../../evaluation')
sys.path.insert(0, '../../evaluation/queries')

In [None]:
%%capture
!pip install duckdb
!pip install spatialpandas

In [None]:
import duckdb
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import config
import utils

# Query some forcast data from parquet files
import importlib
import queries
importlib.reload(queries)

In [None]:
import dask.dataframe as dd
import colorcet as cc
import datashader as ds
import datashader.transfer_functions as tf
import spatialpandas as sp
import spatialpandas.geometry
import spatialpandas.dask 

from dask.distributed import Client, LocalCluster

import holoviews as hv
from holoviews.operation.datashader import rasterize
from holoviews.streams import PlotSize
PlotSize.scale=2 # Sharper plots on Retina displays
hv.extension("bokeh")


In [None]:
cluster = LocalCluster(n_workers=8, threads_per_worker=2)
client = Client(cluster)
cluster

In [None]:
%%time
basins_gdf = utils.parquet_to_gdf(config.HUC10_PARQUET_FILEPATH)

In [None]:
%%time
query = queries.calculate_catchment_metrics(
    config.MEDIUM_RANGE_FORCING_PARQUET,
    config.FORCING_ANALYSIS_ASSIM_PARQUET,
    group_by=["catchment_id"],
    order_by=["observed_average"],
    filters=[
        # {
        #     "column": "catchment_id",
        #     "operator": "like",
        #     "value": "18%"
        # },
        {
            "column": "reference_time",
            "operator": "=",
            "value": "2023-01-03 12:00:00"
        },
    ]
)
df = duckdb.query(query).to_df()
# df

In [None]:
%%time
# Join query to basins
gdf_map = basins_gdf.merge(df, left_on="huc10", right_on="catchment_id")

In [None]:
%%time
# convert to spatial pandas
spdf_map = sp.GeoDataFrame(gdf_map)

In [None]:
%%time
# convert to dask dataframe
ddf = dd.from_pandas(spdf_map, npartitions=8).pack_partitions(npartitions=100).persist()

In [None]:
df = df.repartition(partition_size="256MiB").persist()

In [None]:
%%time
cvs = ds.Canvas(plot_width=650, plot_height=400)
agg = cvs.polygons(ddf, geometry='geometry', agg=ds.mean('bias'))
tf.shade(agg, cmap=cc.bgy)

In [None]:
%%time
cvs = ds.Canvas(plot_width=650, plot_height=400)
agg = cvs.polygons(spdf_map, geometry='geometry', agg=ds.mean('bias'))
tf.shade(agg)

In [None]:
%%time
# make a dask dataframe
ddf = dd.from_pandas(spdf_map, npartitions=2).pack_partitions(npartitions=100).persist()

In [None]:
tf.shade(cvs.polygons(ddf, geometry='geometry', agg=ds.mean('bias')), cmap=cc.kg)