In [None]:
%%capture
!pip install spatialpandas easydev colormap colorcet duckdb dask_geopandas nb_black

In [None]:
%matplotlib inline

# adding project dirs to path so code may be referenced from the notebook
import sys
sys.path.insert(0, '..')

In [None]:
import duckdb
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import config
import utils

In [None]:
# load usgs gages
%time
gdf = utils.get_usgs_gages()

In [None]:
gdf

In [None]:
gdf.plot(markersize=2, figsize=(20,10))

In [None]:
# Query some forcast data from parquet files
import importlib
import queries
importlib.reload(queries)

In [None]:
query = queries.calculate_nwm_feature_metrics(
    config.MEDIUM_RANGE_1_PARQUET,
    config.USGS_PARQUET,
    group_by=["nwm_feature_id"],
    order_by=["observed_average"],
    filters=[
        {
            "column": "reference_time",
            "operator": "=",
            "value": "2023-01-03 12:00:00"
        },
    ]
)
# print(query)
df = duckdb.query(query).to_df()
df

In [None]:
# Join query to basins
gdf_map = gdf.merge(df, left_on="nwm_feature_id", right_on="nwm_feature_id")

In [None]:
# Filter to CA
gdf_map = gdf_map.loc[gdf_map["catchment_id"].str.startswith("18")]
gdf_map = gdf_map.loc[gdf_map["observed_average"] > 0]

gdf_map.plot("max_forecast_delta", legend=True, markersize=1.5, figsize=(20,10))

In [None]:
query = queries.get_joined_nwm_feature_timeseries(
    config.MEDIUM_RANGE_1_PARQUET,
    config.USGS_PARQUET,
    filters=[
        {
            "column": "reference_time",
            "operator": "=",
            "value": "2023-01-03 12:00:00"
        },
        {
            "column": "nwm_feature_id",
            "operator": "=",
            "value": "19266232"
        },
    ]
)

df = duckdb.query(query).to_df()
df

In [None]:
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')

In [None]:
df.sort_values("value_time", inplace=True)
curves = hv.Curve(df, "value_time", "forecast_value") * hv.Curve(df, "value_time", "observed_value")
overlay = hv.Overlay(curves).opts(width=600)
overlay

In [None]:
def get_timeseries(reference_time: str, nwm_feature_id: str):
    query = queries.get_joined_nwm_feature_timeseries(
        config.MEDIUM_RANGE_1_PARQUET,
        config.USGS_PARQUET,
        filters=[
            {
                "column": "reference_time",
                "operator": "=",
                "value": reference_time
            },
            {
                "column": "nwm_feature_id",
                "operator": "=",
                "value": str(nwm_feature_id)
            },
        ]
    )
    df = duckdb.query(query).to_df()
    return df.sort_values("value_time")

In [None]:
# Get distinct reference times for drop down
query = f"""
    SELECT 
    distinct(reference_time)
    FROM read_parquet('{config.MEDIUM_RANGE_1_PARQUET}/*.parquet')
;"""
# print(query)
reference_times = duckdb.query(query).to_df().sort_values("reference_time")
reference_times = reference_times["reference_time"].to_list()
reference_times = [pd.to_datetime(d).strftime("%Y-%m-%d %H:%M:%S") for d in reference_times]

In [None]:
# Get distinct nwm_+feature_ids for drop down
query = f"""
    SELECT 
    distinct(nwm_feature_id)
    FROM read_parquet('{config.MEDIUM_RANGE_1_PARQUET}/*.parquet')
;"""
# print(query)
nwm_feature_ids = duckdb.query(query).to_df()
nwm_feature_ids = nwm_feature_ids["nwm_feature_id"].to_list()

In [None]:
def load_timeseries(reference_time: str, nwm_feature_id: str, **kwargs):
    df = get_timeseries(reference_time, nwm_feature_id)
    return hv.Curve(df, "value_time", "forecast_value", label="Forecast").opts(framewise=True) * hv.Curve(df, "value_time", "observed_value", label="Observed").opts(framewise=True)

In [None]:
import panel as pn
pn.extension()

reference_time = pn.widgets.Select(options=reference_times)
reference_time = pn.widgets.DiscretePlayer(name='Reference Time', options=reference_times, value=reference_times[0], loop_policy='loop')
nwm_feature_id = pn.widgets.Select(name='NWM Feature ID', options=nwm_feature_ids)

dmap = hv.DynamicMap(pn.bind(load_timeseries, reference_time=reference_time, nwm_feature_id=nwm_feature_id))

# reference_time.controls(jslink=True)
app = pn.Column(
    pn.Row(pn.WidgetBox('## Timeseries Explorer', nwm_feature_id, reference_time), dmap.opts(width=500, framewise=True)),
).servable()

app
