# Run Queries Against Post-Event Dataset
In this notebook we will run the queries against the post event datasets which are CONUS HUC10 mean areal precipitation that spans 2022-12-18 to 2023-01-18.

In [None]:
# Import the required packages.

# Need to install TEEHR to avoid this
import sys
sys.path.insert(0, "../../src")

import holoviews as hv
import geoviews as gv
import teehr.queries.duckdb as tqd
import hvplot.pandas
import cartopy.crs as ccrs
from holoviews import opts

## First - MAPs

In [None]:
# Set some notebook variables to point to the relevant study files.
FORCING_ANALYSIS_ASSIM = "/home/jovyan/shared/rti-eval/post-event-example/timeseries/forcing_analysis_assim/*.parquet"
FORCING_MEDIUM_RANGE = "/home/jovyan/shared/rti-eval/post-event-example/timeseries/forcing_medium_range/*.parquet"
CATCHMENT_XWALK = "/home/jovyan/shared/rti-eval/post-event-example/geo/huc10_huc10_crosswalk.parquet"
CATCHMENT_GEOMETRY = "/home/jovyan/shared/rti-eval/post-event-example/geo/huc10_geometry.parquet"

In [None]:
%%time
# Query the specified study files with a simple group_by `primary_location_id`.
# Include geometry in the response.

query_gdf = tqd.get_metrics(
        primary_filepath=PRIMARY_FILEPATH,
        secondary_filepath=SECONDARY_FILEPATH,
        crosswalk_filepath=CROSSWALK_FILEPATH,
        # geometry_filepath=GEOMETRY_FILEPATH,
        group_by=["primary_location_id", "reference_time"],
        order_by=["primary_location_id", "reference_time"],
        filters=[
            {
                "column": "primary_location_id",
                "operator": "like",
                "value": "huc10-1810020416"
            },
            {
                "column": "reference_time",
                "operator": "=",
                "value": "2022-12-18 12:00:00"
            },
        ],
        return_query=False,
        include_geometry=False,
)

In [None]:
query_gdf 

In [None]:
%%time
# Query the specified study files with a simple group_by `primary_location_id`.
# Include geometry in the response.

query_gdf = tqd.get_metrics(
        primary_filepath=PRIMARY_FILEPATH,
        secondary_filepath=SECONDARY_FILEPATH,
        crosswalk_filepath=CROSSWALK_FILEPATH,
        geometry_filepath=GEOMETRY_FILEPATH,
        group_by=["primary_location_id"],
        order_by=["primary_location_id"],
        filters=[{
            "column": "primary_location_id",
            "operator": "like",
            "value": "huc10-18%"
        }],
        return_query=False,
        include_geometry=True,
)

In [None]:
query_gdf 

In [None]:
%%time
# Query the specified study files with a simple group_by `primary_location_id`.
# Include geometry in the response.

query_pd_gdf = tqp.get_metrics(
        primary_filepath=FORCING_ANALYSIS_ASSIM,
        secondary_filepath=FORCING_MEDIUM_RANGE,
        crosswalk_filepath=CATCHMENT_XWALK,
        geometry_filepath=CATCHMENT_GEOMETRY,
        group_by=["primary_location_id"],
        order_by=["primary_location_id"],
        include_metrics=["nash_sutcliffe_efficiency"],
        filters=[{
            "column": "primary_location_id",
            "operator": "like",
            "value": "huc10-18%"
        }],
        return_query=False,
        include_geometry=True,
)

In [None]:
query_pd_gdf

In [None]:
%%time
query_gdf_prj = query_gdf.to_crs("EPSG:3857")

In [None]:
%%time
tiles = gv.tile_sources.OSM
query_hvplot = query_gdf_prj.hvplot(
    color="red",
    crs=ccrs.GOOGLE_MERCATOR,
    hover_cols=["nash_sutcliffe_efficiency"]
)
(tiles * query_hvplot).opts(width=600, height=600, show_legend=False)

In [None]:
%%time
query_df = tqd.get_joined_timeseries(
        primary_filepath=FORCING_ANALYSIS_ASSIM,
        secondary_filepath=FORCING_MEDIUM_RANGE,
        crosswalk_filepath=CATCHMENT_XWALK,
        geometry_filepath=CATCHMENT_GEOMETRY,
        order_by=["primary_location_id"],
        filters=[{
            "column": "primary_location_id",
            "operator": "=",
            "value": "huc10-1801010101"

        }],
        return_query=False,
)

In [None]:
query_df

In [None]:
query_df.hvplot("value_time", "primary_value")

In [None]:
## Second - Point Data

In [None]:
# Set some notebook variables to point to the relevant study files.
USGS = "/home/jovyan/shared/rti-eval/post-event-example/timeseries/usgs/*.parquet"
MEDIUM_RANGE_MEM1 = "/home/jovyan/shared/rti-eval/post-event-example/timeseries/medium_range_mem1/*.parquet"
GAGE_XWALK = "/home/jovyan/shared/rti-eval/post-event-example/geo/usgs_nwm22_crosswalk.parquet"
USGS_GEOMETRY = "/home/jovyan/shared/rti-eval/post-event-example/geo/usgs_geometry.parquet"

In [None]:
%%time
# Query the specified study files with a simple group_by `primary_location_id`.
# Include geometry in the response.

query_gdf = tqd.get_metrics(
        primary_filepath=USGS,
        secondary_filepath=MEDIUM_RANGE_MEM1,
        crosswalk_filepath=GAGE_XWALK,
        geometry_filepath=USGS_GEOMETRY,
        group_by=["primary_location_id"],
        order_by=["primary_location_id"],
        include_metrics=["bias"],
        filters=[{
            "column": "primary_value",
            "operator": ">",
            "value": "-999"
        }],
        return_query=False,
        include_geometry=True,
)

In [None]:
query_gdf

In [None]:
%%time
tiles = gv.tile_sources.OSM
query_hvplot = query_gdf.to_crs("EPSG:3857").hvplot(
    crs=ccrs.GOOGLE_MERCATOR,
    c="bias"
)
(tiles * query_hvplot).opts(width=1200, height=600, show_legend=False)