In [None]:
from teehr.fetching.nwm.nwm_points import nwm_to_parquet
from teehr import Evaluation

import tempfile
from pathlib import Path
from datetime import datetime

import pytest
import pandas as pd
from dask.distributed import Client

In [None]:
client = Client()
client

In [None]:
ROOT_DIR = Path.cwd().parent  # if running locally
# ROOT_DIR = Path("/data/playground/teehr-test-data")  # if connected to a remote kernel

In [None]:
ROOT_DIR

In [None]:
TEST_STUDY_DATA_DIR = Path(ROOT_DIR, "data", "test_study")
GEO_GAGES_FILEPATH = Path(
    TEST_STUDY_DATA_DIR,
    "geo",
    "usgs_point_geometry.test.parquet"
)
CROSSWALK_FILEPATH = Path(
    TEST_STUDY_DATA_DIR,
    "geo",
    "usgs_nwm30_crosswalk.test.parquet"
)

### Test fetching NWM operational points within an Evaluation

In [None]:
with tempfile.TemporaryDirectory(prefix="teehr-") as tempdir:

    ev = Evaluation(dir_path=tempdir)
    ev.clone_template()

    ev.locations.load_spatial(in_path=GEO_GAGES_FILEPATH)

    ev.location_crosswalks.load_parquet(
        in_path=CROSSWALK_FILEPATH
    )

    ev.fetch.usgs_streamflow(
        start_date=datetime(2022, 2, 22),
        end_date=datetime(2022, 2, 23)
    )

    ev.fetch.nwm_operational_points(
        nwm_configuration="analysis_assim",
        output_type="channel_rt",
        variable_name="streamflow",
        start_date=datetime(2024, 2, 22),
        ingest_days=1,
        nwm_version="nwm30",
        prioritize_analysis_valid_time=True,
        t_minus_hours=[0],
        process_by_z_hour=False
    )
    ts_df = ev.secondary_timeseries.to_pandas()

### Test fetching NWM operational points independent of an Evaluation

In [None]:
df = pd.read_parquet(CROSSWALK_FILEPATH)
nwm_ids = df["secondary_location_id"].str.split("-", expand=True).iloc[:, 1].tolist()

In [None]:
with tempfile.TemporaryDirectory(prefix="teehr-") as tempdir:

    # choose dates to fetch - must be within range for a single nwm version
    start_date = datetime(2024, 1, 2)
    end_date = datetime(2024, 1, 2)
    nwm_version = "nwm30"

    # choose nwm configuration, associated nwm output type, and variable
    # "channel_rt" for short range and all AnA configs
    # "channel_rt_X" for medium_range_memX, e.g., channel_rt_1 for medium_range_mem1
    # configuration = "medium_range_mem1"
    # output_type = "channel_rt_1"

    configuration = "analysis_assim"
    output_type = "channel_rt"

    variable_name = "streamflow"

    # run teehr fetching function for point data
    nwm_to_parquet(
        configuration=configuration,
        output_type=output_type,
        variable_name=variable_name,
        start_date=start_date,
        ingest_days=(end_date - start_date).days + 1,
        location_ids=nwm_ids,
        json_dir=Path(tempdir, "kerchunk", configuration),
        output_parquet_dir=Path(tempdir, "parquet", configuration),
        nwm_version=nwm_version,
        data_source="GCS",
        kerchunk_method="local",
        process_by_z_hour=True,
        ignore_missing_file=True,
        overwrite_output=True
    )

### Run all tests (currently fails due to path issues)

In [None]:
# ! pytest ../test_*