In [1]:
import os
from pathlib import Path
import shutil

import teehr

teehr.__version__

'0.6.0dev2'

In [2]:
from spark_session_utils_v06_dev import create_spark_session

In [4]:
import boto3
session = boto3.Session()
credentials = session.get_credentials()
aws_access_key_id = credentials.access_key
aws_secret_access_key = credentials.secret_key
aws_region = session.region_name
os.environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
os.environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
os.environ["AWS_REGION"] = aws_region

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [5]:
spark = create_spark_session(
    start_spark_cluster=True,
    executor_instances=20,
    executor_memory="16g",
    executor_cores=4,
)

INFO:spark_session_utils_v06_dev:üöÄ Creating Spark session: TEEHR Evaluation
INFO:spark_session_utils_v06_dev:‚úÖ Spark local configuration successful!
INFO:spark_session_utils_v06_dev:üîë Using AWS credentials from environment variables
INFO:spark_session_utils_v06_dev:   - Using long-term credentials
INFO:spark_session_utils_v06_dev:Configuring Iceberg catalogs...
INFO:spark_session_utils_v06_dev:‚öôÔ∏è All settings applied. Creating Spark session...
:: loading settings :: url = jar:file:/srv/conda/envs/notebook/lib/python3.12/site-packages/pyspark/jars/ivy-2.5.3.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /home/jovyan/.ivy2.5.2/cache
The jars for the packages stored in: /home/jovyan/.ivy2.5.2/jars
org.apache.sedona#sedona-spark-shaded-4.0_2.13 added as a dependency
org.apache.iceberg#iceberg-spark-runtime-4.0_2.13 added as a dependency
org.datasyslab#geotools-wrapper added as a dependency
org.apache.iceberg#iceberg-spark-extensions-4.0_2.13 added a

In [6]:
%%time
dir_path = "/data/temp_warehouse"

ev = teehr.Evaluation(
    spark=spark,
    dir_path=dir_path,
    check_evaluation_version=False,
)

INFO:teehr.evaluation.evaluation:Creating directory /data/temp_warehouse_sjl.
INFO:teehr.evaluation.evaluation:Using provided Spark session.
INFO:teehr.evaluation.evaluation:Active catalog set to local.


CPU times: user 6.26 ms, sys: 222 Œºs, total: 6.48 ms
Wall time: 105 ms


In [7]:
ev.set_active_catalog("remote")

ev.active_catalog

INFO:teehr.evaluation.evaluation:Active catalog set to remote.


RemoteCatalog(warehouse_dir='s3://dev-teehr-iceberg-warehouse/', catalog_name='iceberg', namespace_name='teehr', catalog_type='rest', catalog_uri='http://iceberg-rest:8181')

In [7]:
from teehr import DeterministicMetrics as dm
from teehr import SignatureMetrics as sm

In [8]:
sdf = ev.metrics.query(
    group_by=["primary_location_id", "configuration_name"],
    include_metrics=[
        sm.Count(),
        sm.Average(),
        dm.RelativeBias(),
        dm.NashSutcliffeEfficiency(),
        dm.KlingGuptaEfficiency()
    ]
).to_sdf()

INFO:teehr.evaluation.tables.base_table:Loading files from iceberg.teehr.joined_timeseries.
INFO:teehr.evaluation.read:Reading files from iceberg.teehr.joined_timeseries.
INFO:teehr.evaluation.metrics:Calculating performance metrics.


In [9]:
sdf.createTempView("metrics")

In [10]:
sdf = ev.spark.sql("""
SELECT m.*, l.* FROM metrics m JOIN iceberg.teehr.locations l ON l.id = m.primary_location_id
""")
sdf = sdf.drop("id")

In [11]:
sdf.show()

+-------------------+-------------------+--------+----------+-------------+-------------------------+----------------------+--------------------+--------------------+
|primary_location_id| configuration_name|   count|   average|relative_bias|nash_sutcliffe_efficiency|kling_gupta_efficiency|                name|            geometry|
+-------------------+-------------------+--------+----------+-------------+-------------------------+----------------------+--------------------+--------------------+
|      usgs-01306460|nwm30_retrospective|133267.0| 0.7172556|    1.4566711|               -50.731518|             -4.062736|CONNETQUOT BK NR ...|[01 01 00 00 00 B...|
|      usgs-01391000|nwm30_retrospective|268745.0| 1.2239597|  -0.44985306|                0.5154401|            0.47533485|Hohokus Brook at ...|[01 01 00 00 00 1...|
|      usgs-01398500|nwm30_retrospective|293517.0| 1.5704323|  -0.02060855|                  0.52862|            0.77016234|North Branch Rari...|[01 01 00 00 00 D...

In [12]:
ev.write.to_warehouse(source_data=sdf, table_name="sim_metrics_by_location", write_mode="create_or_replace")

