## TEEHR Runner Notebook

---

In [1]:
print("Importing dependencies...")
from pathlib import Path
import os

import pandas as pd

from utils import teehr_ngiab

Importing dependencies...


#### Setting paths

In [2]:
print("Setting paths...")
NGIAB_OUTPUT_DIR = Path("/app/data")

TEEHR_EVALUATION_DIR = Path("/app/data/teehr")

TEMP_DIR = Path(TEEHR_EVALUATION_DIR, "cache")

CROSSWALK_TABLE = Path(TEMP_DIR, "xwalk_table.parquet")
LOCATIONS = Path(TEMP_DIR, "usgs_locations.parquet")
METRICS_CSV_FILEPATH = Path(TEEHR_EVALUATION_DIR, "metrics.csv")

Setting paths...


#### Display the specified NGIAB output directory

In [3]:
MOUNTED_DATA_DIR = Path(os.environ.get("NGIAB_OUTPUT_DIR"))
configuration_name = teehr_ngiab.sanitize_string(MOUNTED_DATA_DIR.name)
print(f"Evaluating NGIAB output in: {MOUNTED_DATA_DIR}")
print(f"configuration_name = '{configuration_name}'")

Evaluating NGIAB output in: /mnt/data/ciroh/teehr/devcon2025/ngiab_sample_output/cat-491334-partial
configuration_name = 'cat_491334_partial'


#### Collect the run options from runTeehr.sh

In [4]:
print("Collecting the run options...")
run_options = os.environ.get("RUN_OPTIONS")
if run_options:
    run_options = [int(opt) for opt in run_options.split(",")]
    print(f"Run options: {run_options}")

Collecting the run options...
Run options: [2, 3]


#### Option 1. Build the TEEHR Evaluation

In [5]:
if 1 in run_options:
    print("Running option 1: Creating the TEEHR Evaluation")
    teehr_ngiab.create_teehr_evaluation(
        teehr_evaluation_dir=TEEHR_EVALUATION_DIR,
        ngiab_output_dir=NGIAB_OUTPUT_DIR,
        crosswalk_table_filepath=CROSSWALK_TABLE,
        locations_filepath=LOCATIONS,
        temp_dir=TEMP_DIR,
        configuration_name=configuration_name,
    )

#### Option 2. Calculate metrics

In [6]:
if 2 in run_options:
    print("Running option 2: Calculating performance metrics")
    teehr_ngiab.calculate_metrics(
        teehr_evaluation_dir=TEEHR_EVALUATION_DIR,
        metrics_csv_filepath=METRICS_CSV_FILEPATH
    )
    # Display the calculated metrics.
    metrics_df = pd.read_csv(METRICS_CSV_FILEPATH)
    display(metrics_df)

Running option 2: Calculating performance metrics


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


25/05/24 10:10:02 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


[Stage 0:>                                                          (0 + 1) / 1]

                                                                                

[Stage 1:>                                                          (0 + 2) / 2]



[Stage 3:>                                                          (0 + 1) / 1]

                                                                                

Unnamed: 0,primary_location_id,configuration_name,kling_gupta_efficiency,nash_sutcliffe_efficiency,relative_bias,root_mean_standard_deviation_ratio
0,usgs-02423130,cat_491334_partial,0.080198,-0.454538,-0.475969,1.206042
1,usgs-02423130,nwm30_retrospective,0.175289,0.395477,-0.555731,0.777511
2,usgs-02423380,cat_491334_partial,-0.517943,-2.972862,0.463203,1.993204
3,usgs-02423380,nwm30_retrospective,0.406938,0.569185,-0.407911,0.656365
4,usgs-02423397,cat_491334_partial,-0.501999,-0.092996,-0.865688,1.045464
5,usgs-02423397,nwm30_retrospective,0.528123,0.536092,-0.114646,0.681108
6,usgs-02423414,cat_491334_partial,0.022763,-1.512941,-0.434056,1.585226
7,usgs-02423414,nwm30_retrospective,0.111076,0.087543,-0.245032,0.955226
8,usgs-02423425,cat_491334_partial,-0.063768,-1.923545,0.234052,1.709838
9,usgs-02423425,nwm30_retrospective,0.705048,0.669517,-0.017121,0.574877
