In [16]:
import hvplot.pandas
import numpy as np
import pandas as pd
import panel as pn
import geopandas as gpd
from trino.dbapi import connect
import folium
import param
import os

import holoviews as hv
import geoviews as gv
import cartopy.crs as ccrs
from holoviews import opts

from teehr.querying.utils import df_to_gdf

In [17]:
# Trino connection configuration
TRINO_HOST = os.environ.get("TRINO_HOST", "localhost")
TRINO_PORT = os.environ.get("TRINO_PORT", 8080)
TRINO_USER = os.environ.get("TRINO_USER", "teehr")
TRINO_CATALOG = os.environ.get("TRINO_CATALOG", "iceberg")
TRINO_SCHEMA = os.environ.get("TRINO_SCHEMA", "teehr")

In [18]:
def get_trino_connection():
    """Establishes and returns a Trino database connection."""
    conn = connect(
        host=TRINO_HOST,
        user=TRINO_USER,
        catalog=TRINO_CATALOG,
        schema=TRINO_SCHEMA,
        http_scheme='http',
        port=TRINO_PORT,
        # For production, add authentication:
        # auth=BasicAuthentication("username", "password")
    )
    return conn

In [19]:
def get_primary_timeseries(location_id: str):
    """Fetches primary timeseries data for a given location from the Trino database and returns it as a DataFrame."""
    conn = get_trino_connection()
    sql = f"SELECT * FROM iceberg.teehr.primary_timeseries WHERE location_id = '{location_id}'"
    df = pd.read_sql(sql, conn)
    
    return df

In [20]:
def get_secondary_timeseries(location_id: str, configuration_names: list = ["nwm30_short_range"]):
    """Fetches secondary timeseries data for a given primary location from the Trino database and returns it as a DataFrame."""
    conn = get_trino_connection()
    sql = f"""
        SELECT st.* 
        FROM iceberg.teehr.secondary_timeseries st
        JOIN location_crosswalks lc
        ON st.location_id = lc.secondary_location_id
        WHERE lc.primary_location_id = '{location_id}'
        AND configuration_name IN ({','.join(f"'{name}'" for name in configuration_names)})
    """
    df = pd.read_sql(sql, conn)
    
    return df

In [21]:
location_id = "usgs-05443500"

In [24]:
primary_df = get_primary_timeseries(location_id)
secondary_df = get_secondary_timeseries(location_id)

min_time = secondary_df['value_time'].min()
max_time = secondary_df['value_time'].max()

primary_plot = primary_df[
    primary_df['value_time'].between(min_time, max_time)
].hvplot.line(
    x='value_time',
    y='value',
    by='configuration_name',
    legend=False,
    color='black'
)

secondary_plot = secondary_df[
    secondary_df['value_time'].between(min_time, max_time)
].hvplot.line(
    x='value_time',
    y='value',
    by=['configuration_name', 'reference_time'],
    legend=False
)

# state.timeseries_loading = False

(primary_plot * secondary_plot).opts(
    title=f"Observed and Simulated Timeseries at {location_id}"
    )

  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)


In [31]:
stats_df = secondary_df.groupby([
    "value_time",
    "location_id",
    "unit_name",
    "configuration_name",
    "variable_name"
])["value"].agg(['mean', 'min', 'max'])

In [35]:
stats_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,mean,min,max
value_time,location_id,unit_name,configuration_name,variable_name,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-10-27 01:00:00+00:00,nwm30-10607692,m^3/s,nwm30_short_range,streamflow_hourly_inst,83.65,83.65,83.65
2025-10-27 02:00:00+00:00,nwm30-10607692,m^3/s,nwm30_short_range,streamflow_hourly_inst,83.289997,82.39,84.189995
2025-10-27 03:00:00+00:00,nwm30-10607692,m^3/s,nwm30_short_range,streamflow_hourly_inst,84.0,82.78,84.89
2025-10-27 04:00:00+00:00,nwm30-10607692,m^3/s,nwm30_short_range,streamflow_hourly_inst,85.322499,83.28,87.659996
2025-10-27 05:00:00+00:00,nwm30-10607692,m^3/s,nwm30_short_range,streamflow_hourly_inst,86.727999,83.909996,90.22


In [33]:
secondary_stats_df = secondary_df.join(stats_df, on=[
    "value_time",
    "location_id",
    "unit_name",
    "configuration_name",
    "variable_name"
], rsuffix='_stat')

In [45]:
shaded_short_range_plot = secondary_stats_df.hvplot.area(
    y="min",
    y2="max",
    x="value_time",
    color='lightblue', 
    alpha=0.5, 
    legend=True,
    label="NWM Short Range",
    grid=True
)   

In [51]:
mean_short_range_plot = secondary_stats_df.hvplot.line(
    y="mean",
    x="value_time",
    color='black',
    legend=True,
    label="NWM Short Range Mean",
    grid=True
)
mean_short_range_plot * shaded_short_range_plot

In [53]:
shaded_short_range_plot * primary_plot

In [54]:
secondary_plot * primary_plot