In [None]:
import teehr
from pathlib import Path
import shutil
import xarray as xr
import geopandas as gpd
from teehr.models.pydantic_table_models import (
    Attribute,
    Configuration,
    Variable
)

In [None]:
# Set a path to the directory where the evaluation will be created
TEST_STUDY_DIR = Path(Path().home(), "temp", "test_study")
shutil.rmtree(TEST_STUDY_DIR, ignore_errors=True)
TEST_STUDY_DIR.mkdir(parents=True, exist_ok=True)

# Set a path to the directory where the test data is stored
TEST_DATA_DIR = Path(Path(teehr.__file__).parent.parent.parent, "tests/data/v0_3_test_study")
GEOJSON_GAGES_FILEPATH = Path(TEST_DATA_DIR, "geo", "gages.geojson")
PRIMARY_TIMESERIES_FILEPATH = Path(
    TEST_DATA_DIR, "timeseries", "test_short_obs.parquet"
)
CROSSWALK_FILEPATH = Path(TEST_DATA_DIR, "geo", "crosswalk.csv")
SECONDARY_TIMESERIES_FILEPATH = Path(
    TEST_DATA_DIR, "timeseries", "test_short_fcast.parquet"
)
GEO_FILEPATH = Path(TEST_DATA_DIR, "geo")

Timeseries, location crosswalks, and location attributes can also be loaded from netcdf files.

In [None]:
# Create a new Evaluation object
shutil.rmtree(TEST_STUDY_DIR, ignore_errors=True)
TEST_STUDY_DIR.mkdir(parents=True, exist_ok=True)

ev = teehr.Evaluation(dir_path=TEST_STUDY_DIR)

# Enable logging
ev.enable_logging()

# Clone the template
ev.clone_template()

In [None]:
# The primary locations file with geometry
# (TEEHR currently only supports formats readable by geopandas)
NETCDF_GEO_FILEPATH = Path(
    "/Users/mdenno/repos/teehr/tests/data/test_study/geo/summa_locations.parquet"
)
gpd.read_parquet(NETCDF_GEO_FILEPATH)

In [None]:
# Load the locations and add the configuration and variable domain entries
ev.locations.load_spatial(in_path=NETCDF_GEO_FILEPATH)

ev.configurations.add(
    Configuration(
        name="summa",
        type="primary",
        description="Summa Runoff Data"
    )
)

ev.variables.add(
    Variable(
        name="runoff",
        long_name="runoff"
    )
)

In [None]:
# Here is an example considered a primary timeseries file in netcdf format.
NETCDF_TIMESERIES_FILEPATH = Path(
    "/Users/mdenno/repos/teehr/tests/data/test_study/timeseries/summa.example.nc"
)
# Look at the contents of the netcdf file to determine the field mapping
xr.open_dataset(NETCDF_TIMESERIES_FILEPATH)

The TEEHR Timeseries table schema includes fields:

- reference_time
- value_time
- configuration_name
- unit_name
- variable_name
- value
- location_id

In [None]:
# Map the fields in the netcdf file to the fields in the database, defining the constant values
summa_field_mapping = {
    "time": "value_time",
    "averageRoutedRunoff_mean": "value",
    "gru": "location_id"
}
summa_constant_field_values = {
    "unit_name": "m^3/s",
    "variable_name": "runoff",
    "configuration_name": "summa",
    "reference_time": None
}

In [None]:
# Load the timeseries data, mapping over the fields and setting constants
ev.primary_timeseries.load_netcdf(
    in_path=NETCDF_TIMESERIES_FILEPATH,
    field_mapping=summa_field_mapping,
    constant_field_values=summa_constant_field_values
)

In [None]:
ev.primary_timeseries.to_pandas().head()