In [None]:
import teehr
from pathlib import Path
import shutil

In [None]:
# Set a path to the directory where the evaluation will be created
TEST_STUDY_DIR = Path(Path().home(), "temp", "real_study")
shutil.rmtree(TEST_STUDY_DIR, ignore_errors=True)
TEST_STUDY_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
TEST_DATA = Path(Path(teehr.__file__).parent.parent.parent, "tests/data/two_locations/")
LOCATIONS = Path(TEST_DATA, "two_locations.parquet")
XWALKS = Path(TEST_DATA, "two_crosswalks.parquet")
LOCATION_ATTRS = Path(TEST_DATA, "two_location_attributes.parquet")

In [None]:
# Create an Evaluation object
ev = teehr.Evaluation(dir_path=TEST_STUDY_DIR)

# Enable logging
ev.enable_logging()

In [None]:
# Clone the template
ev.clone_template()

In [None]:
# Load the location data (observations)
ev.locations.load_spatial(in_path=LOCATIONS)

In [None]:
# Load the timeseries data and map over the fields and set constants
ev.locations.to_geopandas()

In [None]:
ev.fetch.usgs_streamflow(
    start_date="2000-10-01",
    end_date="2012-09-30"
)

In [None]:
ev.primary_timeseries.to_pandas()

In [None]:
# Load the crosswalk data
ev.location_crosswalks.load_parquet(
    in_path=XWALKS
)

In [None]:
ev.fetch.nwm_retrospective_points(
    nwm_version="nwm30",
    variable_name="streamflow",
    start_date="2000-10-01",
    end_date="2012-09-30"
)

In [None]:
ev.secondary_timeseries.to_pandas()

In [None]:
from teehr import Attribute
import duckdb

In [None]:
df = duckdb.query(
    f"SELECT distinct(attribute_name) FROM read_parquet('{LOCATION_ATTRS}');"
).to_df()
attrs_list = [Attribute(name=i, type="categorical", description=i) for i in list(df.attribute_name)]
# attrs_list

In [None]:
# Add some attributes
ev.attributes.add(attrs_list)

In [None]:
ev.location_attributes.load_parquet(LOCATION_ATTRS, field_mapping={"attribute_value": "value"})

In [None]:
# Create the joined timeseries
ev.joined_timeseries.create(add_attrs=True, execute_udf=True)

In [None]:
ev.joined_timeseries.to_pandas()