## Exploring the TEEHR Evaluation

---

In [None]:
from pathlib import Path
import os

import teehr
from utils import teehr_ngiab
from teehr.evaluation.utils import print_tree

# Enable use of accessor
from bokeh.io import output_notebook
output_notebook()

In [None]:
MOUNTED_DATA_DIR = Path(os.environ.get("NGIAB_OUTPUT_DIR"))
configuration_name = teehr_ngiab.sanitize_string(MOUNTED_DATA_DIR.name)
print(f"NGIAB output directory: {MOUNTED_DATA_DIR}")

#### Initialize the Evaluation object

In [None]:
TEEHR_EVALUATION_DIR = Path("/app/data/teehr")

# Initialize an Evaluation object from the directory
ev = teehr.Evaluation(dir_path=TEEHR_EVALUATION_DIR)

In [None]:
print_tree(TEEHR_EVALUATION_DIR, max_depth=2)

#### Location Data Tables

- `locations` - spatial data (ie, gage locations)
- `location_crosswalks` - crosswalk table relating primary vs. secondary IDs
- `location_attributes` - attribute data per location

In [None]:
locations_gdf = ev.locations.to_geopandas()
locations_gdf

In [None]:
locations_gdf.teehr.locations_map()

In [None]:
ev.location_crosswalks.to_pandas().head()

#### Domain Tables

"Lookup tables" that help keep data consistent.
- `units`
- `variables`
- `configurations`
- `attributes`

In [None]:
# Example:
ev.units.to_pandas()

#### Timeseries Tables
- `primary_timeseries` - "observations"
- `secondary_timeseries` - "simulations"
- `joined_timeseries` - paired timeseries

In [None]:
# Example:
ev.primary_timeseries.to_sdf().show()

#### Schema

https://rtiinternational.github.io/teehr/user_guide/notebooks/01_introduction_schema.html

#### Table Methods

##### List table fields

In [None]:
ev.joined_timeseries.fields()

##### Show distinct values in a given field

In [None]:
# Distinct field values
ev.joined_timeseries.distinct_values("configuration_name")

##### Query a table using filters

In [None]:
ev.joined_timeseries.query(
    filters=[
        "configuration_name =  'cat_491334_partial'",
        "primary_location_id = 'usgs-02423130'"
    ]
).to_pandas().head()

#### Calculating Performance Metrics

##### Metrics in TEEHR are organized into 3 categories:
- Deterministic (KGE, bias, rmse...)
- Signature (average, count, max...)
- Probabilistic (CRPS)

In [None]:
metrics_gdf = ev.metrics.query(
    group_by=["primary_location_id", "configuration_name"],
    order_by=["primary_location_id"],
    include_metrics=[
        teehr.DeterministicMetrics.KlingGuptaEfficiency(),
        teehr.SignatureMetrics.Average(),
        teehr.SignatureMetrics.MaxValueTime(),
        teehr.DeterministicMetrics.MeanAbsoluteError()
    ]
).to_geopandas()
metrics_gdf