In [1]:
import hvplot.pandas
import numpy as np
import pandas as pd
import panel as pn
import geopandas as gpd
from trino.dbapi import connect
import folium
import param

import holoviews as hv
import geoviews as gv
import cartopy.crs as ccrs
from holoviews import opts

from teehr.querying.utils import df_to_gdf

In [2]:
import warnings
warnings.filterwarnings("ignore", message="pandas only supports SQLAlchemy connectable")

In [None]:
# Trino connection configuration
TRINO_HOST = "trino"
# TRINO_HOST = "localhost"
TRINO_PORT = 8080
TRINO_USER = "teehr"
TRINO_CATALOG = "iceberg"
TRINO_SCHEMA = "teehr"

In [3]:
pn.extension('tabulator', design="material", sizing_mode="stretch_width")

In [None]:
class DashboardState(param.Parameterized):
    locations_gdf = param.ClassSelector(class_=gpd.GeoDataFrame, default=None)
    metrics_gdf = param.ClassSelector(class_=gpd.GeoDataFrame, default=None)
    selected_index = param.Integer(default=0)
    location_id = param.String(default="")
    configuration_names = param.List(default=[])
    metrics = param.List(default=[])
    timeseries_loading = param.Boolean()
    # selected_configuration_name = param.String(default=None)
    # selected_metric = param.String(default=None)

    @param.depends('selected_index', watch=True)
    def update_location_id(self):
        if hasattr(self, 'metrics_gdf') and self.metrics_gdf is not None:
            try:
                self.location_id = str(self.metrics_gdf.iloc[self.selected_index]['primary_location_id'])
                # print("Setting location_id to:", self.location_id)
            except Exception as e:
                self.location_id = ""
                print("Error occurred while setting location_id.  Setting to empty string.", e)
        else:
            self.location_id = ""
            print("Locations GeoDataFrame not found. Setting location_id to empty string.")

    @param.depends('metrics_gdf', watch=True)
    def update_metrics(self):
        if hasattr(self, 'metrics_gdf') and self.metrics_gdf is not None:
            self.configuration_names = self.metrics_gdf['configuration_name'].unique().tolist()
            self.metrics = self.metrics_gdf.columns.difference(['geometry', 'primary_location_id', 'name', 'configuration_name']).tolist()
        else:
            self.configuration_names = []
            self.metrics = []

state = DashboardState()

In [None]:

def get_trino_connection():
    """Establishes and returns a Trino database connection."""
    conn = connect(
        host=TRINO_HOST,
        user=TRINO_USER,
        catalog=TRINO_CATALOG,
        schema=TRINO_SCHEMA,
        http_scheme='http',
        port=TRINO_PORT,
        # For production, add authentication:
        # auth=BasicAuthentication("username", "password")
    )
    return conn


In [None]:
# Selected location callback
def selection_callback(index):
    if len(index) > 1:
        print("Multiple selection detected, using the first selected index.")
    state.timeseries_loading = True
    state.selected_index = index[0]
    # print("Selected index:", state.selected_index)
    # You can also get the selected rows:
    # selected_points = gdf_prj.iloc[state.selected_index]
    # Do something with selected_points

In [7]:
def get_locations():
    """Fetches location data from the Trino database and returns it as a GeoDataFrame."""
    conn = get_trino_connection()
    query = """
    SELECT * FROM iceberg.teehr.locations
    WHERE id LIKE 'usgs-%'
    """
    df = pd.read_sql(query, conn)
    gdf = df_to_gdf(df)
    return gdf

locations_gdf = get_locations()
state.locations_gdf = locations_gdf

In [8]:
# @pn.cache
def get_metrics_by_location():
    """Fetches simulation metrics by location from the Trino database and returns it as a GeoDataFrame."""
    conn = get_trino_connection()
    sql = "SELECT * from iceberg.teehr.sim_metrics_by_location"
    df = pd.read_sql(sql, conn)
    gdf = df_to_gdf(df)
    
    return gdf

metrics_gdf = get_metrics_by_location()
state.metrics_gdf = metrics_gdf


In [9]:
def get_primary_timeseries(location_id: str):
    """Fetches primary timeseries data for a given location from the Trino database and returns it as a DataFrame."""
    conn = get_trino_connection()
    sql = f"SELECT * FROM iceberg.teehr.primary_timeseries WHERE location_id = '{location_id}'"
    df = pd.read_sql(sql, conn)
    
    return df

# get_primary_timeseries("usgs-01347000")

In [None]:
def get_secondary_timeseries(location_id: str):
    """Fetches secondary timeseries data for a given primary location from the Trino database and returns it as a DataFrame."""
    conn = get_trino_connection()
    sql = f"""
        SELECT st.* 
        FROM iceberg.teehr.secondary_timeseries st
        JOIN location_crosswalks lc
        ON st.location_id = lc.secondary_location_id
        WHERE lc.primary_location_id = '{location_id}'
    """
    df = pd.read_sql(sql, conn)
    
    return df

# get_secondary_timeseries("usgs-01347000")

In [None]:
def get_timeseries_plot(location_id: str):
    if not location_id:
        return None #pn.pane.Markdown("## Select a location to view its time series data.")
    
    primary_df = get_primary_timeseries(location_id)
    secondary_df = get_secondary_timeseries(location_id)
    
    primary_plot = primary_df.hvplot.line(
        x='value_time', 
        y='value', 
        by='configuration_name',
    )
    
    secondary_plot = secondary_df.hvplot.line(
        x='value_time', 
        y='value', 
        by='configuration_name', 
    )

    state.timeseries_loading = False

    return (primary_plot * secondary_plot).opts(
        title=f"Observed and Simulated Timeseries at {location_id}"
    )

bound_timeseries_plot = pn.bind(
    get_timeseries_plot,
    location_id=state.param.location_id
)

In [None]:
def get_locations_map(metrics_gdf: gpd.GeoDataFrame = None, configuration_name: str = None, metric: str = None):
    
    if configuration_name:
        gdf = metrics_gdf[metrics_gdf['configuration_name'] == configuration_name]
    else:
        gdf = metrics_gdf[metrics_gdf['configuration_name'] == state.configuration_names[0]]
    
    if not metric:
        metric = state.metrics[0]
    
    tiles = gv.tile_sources.OSM
    gdf_prj = gdf.to_crs("EPSG:3857")
    
    points = gv.Points(
        gdf_prj,
        crs=ccrs.GOOGLE_MERCATOR,
        vdims=[metric]
    ).opts(
        tools=["tap", "hover"],
        color=metric,
        size=10,
        width=800,
        height=500,
        selection_alpha=1,
        nonselection_alpha=0.4,
        selection_color="red",
    )
    
    # Attach a selection stream
    selection = hv.streams.Selection1D(source=points)
    
    # Register the callback
    selection.add_subscriber(selection_callback)
    
    return (tiles * points) #.opts(width=600, height=500)

In [None]:
def selected_location_metrics_table(location_id):
    # Show metrics for selected location by configuration_name, transposed for easier viewing
    if not location_id:
        return pn.pane.Markdown("Select a location to view its metrics.")

    filtered_gdf = state.metrics_gdf[state.metrics_gdf['primary_location_id'] == location_id]
    if filtered_gdf.empty:
        return pn.pane.Markdown(f"No metrics found for location ID: {location_id}")
    
    filtered_gdf = filtered_gdf.drop(columns=['geometry'])

    # Transpose for easier viewing
    transposed_gdf = filtered_gdf.set_index('configuration_name').T.reset_index()

    table = pn.widgets.Tabulator(
        transposed_gdf,
        theme="bootstrap",
        pagination='remote',
        page_size=10,
        sizing_mode='stretch_width',
    )
    
    return table


In [None]:
def get_location_details_title(location_id: str, loading: bool = False):
    if not location_id:
        return pn.pane.Markdown("# Location Details Pane")
    if not loading:
        return pn.pane.Markdown(f"# Location Details for {location_id}")
    return pn.Column(
        pn.pane.Markdown(f"# Location Details for {location_id}"),
        pn.indicators.LoadingSpinner(value=True, size=40, name='Loading...')
    )


bound_location_details_title = pn.bind(
    get_location_details_title,
    location_id=state.param.location_id,
    loading=state.param.timeseries_loading
)

In [None]:
locations_widget = pn.widgets.Select(
    name="location",
    options=list(state.metrics_gdf['primary_location_id']),
    value=state.location_id,
)
metrics_widget = pn.widgets.Select(
    name="metric",
    options=list(state.metrics),
    value=state.metrics[0],
)
configurations_widget = pn.widgets.Select(
    name="configuration",
    options=list(state.configuration_names),
    value=state.configuration_names[0],
)

bound_locations_map = pn.bind(
    get_locations_map,
    metrics_gdf=state.param.metrics_gdf,
    configuration_name=configurations_widget.param.value,
    metric=metrics_widget.param.value
)

tabulator = pn.widgets.Tabulator(
    metrics_gdf.drop(columns=["geometry"]), 
    theme="bootstrap", 
    pagination="remote", 
    page_size=10
)

bound_selected_location_metrics_table = pn.bind(
    selected_location_metrics_table,
    location_id=state.param.location_id
)

top_pane = pn.Tabs(
    ("Location Metrics Map", pn.Column(pn.Row(configurations_widget,metrics_widget),pn.Row(bound_locations_map))),
    ("Location Metrics List", tabulator)
)

sidebar = pn.Column(
    pn.pane.Markdown(f"""
        ### 🔧 Database Connection Information
        ```
        Host: {TRINO_HOST}
        Port: {TRINO_PORT}
        User: {TRINO_USER}
        Catalog: {TRINO_CATALOG}
        Schema: {TRINO_SCHEMA}
        ```
    """),
    "---",
)

main = pn.Column(
    top_pane,
    bound_location_details_title,
    bound_timeseries_plot,
    bound_selected_location_metrics_table
)

pn.template.MaterialTemplate(
    site="TEEHR",
    title="Simulation Dashboard",
    sidebar=[sidebar],
    main=[main],
).servable()