In [5]:
from teehr import Evaluation
from pathlib import Path

In [6]:
# Set a path to the directory where the evaluation will be created
TEST_STUDY_DIR = Path(Path().home(), "temp", "test_study")
# TEST_STUDY_DIR = Path("/home/sam/git/teehr/tests/data/v0_3_test_study")

# Create an Evaluation object
eval = Evaluation(dir_path=TEST_STUDY_DIR)

# Enable logging
eval.enable_logging()

In [7]:
ts_df = eval.primary_timeseries.load_parquet().to_pandas()
ts_df.head()

                                                                                

TypeError: PrimaryTimeseriesTable.load_parquet() missing 1 required positional argument: 'in_path'

In [8]:
# Pass in SQL format "where" clauses as chained filter calls.
# Note, no internal validation is done when filtering with SQL formatted strings.
ts_df = eval.primary_timeseries.filter(
    "value_time > '2022-01-01' and value_time < '2022-01-02'"
).filter(
    "location_id = 'gage-C'"
).to_pandas()
ts_df.head()

                                                                                

Unnamed: 0,reference_time,value_time,value,unit_name,location_id
0,NaT,2022-01-01 01:00:00,100.0,m^3/s,gage-C
1,NaT,2022-01-01 02:00:00,100.0,m^3/s,gage-C
2,NaT,2022-01-01 03:00:00,120.0,m^3/s,gage-C
3,NaT,2022-01-01 04:00:00,140.0,m^3/s,gage-C
4,NaT,2022-01-01 05:00:00,160.0,m^3/s,gage-C


In [9]:
# Pass in a list of dictionaries with "column", "operator", and "value" keys
# to chain filters.
ts_df = eval.primary_timeseries.filter(
    [
        {
            "column": "value_time",
            "operator": ">",
            "value": "2022-01-01",
        },
        {
            "column": "value_time",
            "operator": "<",
            "value": "2022-01-02",
        },
        {
            "column": "location_id",
            "operator": "=",
            "value": "gage-C",
        },
    ]
).to_pandas()
ts_df.head()

DEBUG:teehr.querying.filter_format:Validating and applying {'column': 'value_time', 'operator': '<', 'value': '2022-01-02'}
DEBUG:teehr.querying.filter_format:Filter: {"column":"value_time","operator":"<","value":"2022-01-02"}
DEBUG:teehr.querying.filter_format:Validating and applying {'column': 'location_id', 'operator': '=', 'value': 'gage-C'}
DEBUG:teehr.querying.filter_format:Filter: {"column":"location_id","operator":"=","value":"gage-C"}


Unnamed: 0,reference_time,value_time,value,unit_name,location_id
0,NaT,2022-01-01 01:00:00,100.0,m^3/s,gage-C
1,NaT,2022-01-01 02:00:00,100.0,m^3/s,gage-C
2,NaT,2022-01-01 03:00:00,120.0,m^3/s,gage-C
3,NaT,2022-01-01 04:00:00,140.0,m^3/s,gage-C
4,NaT,2022-01-01 05:00:00,160.0,m^3/s,gage-C


In [10]:
# Using the query method to filter the timeseries where filters are passed in as arguments.
ts_df = eval.primary_timeseries.query(
    filters=[
        "value_time > '2022-01-01'",
        "value_time < '2022-01-02'",
        "location_id = 'gage-C'"
    ]
).to_pandas()
ts_df.head()

INFO:teehr.evaluation.tables:Performing the query.
DEBUG:teehr.querying.filter_format:Validating and applying value_time > '2022-01-01'
DEBUG:teehr.querying.filter_format:Validating and applying value_time < '2022-01-02'
DEBUG:teehr.querying.filter_format:Validating and applying location_id = 'gage-C'


Unnamed: 0,reference_time,value_time,value,unit_name,location_id
0,NaT,2022-01-01 01:00:00,100.0,m^3/s,gage-C
1,NaT,2022-01-01 02:00:00,100.0,m^3/s,gage-C
2,NaT,2022-01-01 03:00:00,120.0,m^3/s,gage-C
3,NaT,2022-01-01 04:00:00,140.0,m^3/s,gage-C
4,NaT,2022-01-01 05:00:00,160.0,m^3/s,gage-C


In [11]:
ts_df = eval.primary_timeseries.query(
    filters=[
        {
            "column": "value_time",
            "operator": ">",
            "value": "2022-01-01",
        },
        {
            "column": "value_time",
            "operator": "<",
            "value": "2022-01-02",
        },
        {
            "column": "location_id",
            "operator": "=",
            "value": "gage-C",
        },
    ],
    order_by=["location_id", "value_time"]
).to_pandas()
ts_df.head()

INFO:teehr.evaluation.tables:Performing the query.
DEBUG:teehr.querying.filter_format:Validating and applying {'column': 'value_time', 'operator': '>', 'value': '2022-01-01'}
DEBUG:teehr.querying.filter_format:Filter: {"column":"value_time","operator":">","value":"2022-01-01"}
DEBUG:teehr.querying.filter_format:Validating and applying {'column': 'value_time', 'operator': '<', 'value': '2022-01-02'}
DEBUG:teehr.querying.filter_format:Filter: {"column":"value_time","operator":"<","value":"2022-01-02"}
DEBUG:teehr.querying.filter_format:Validating and applying {'column': 'location_id', 'operator': '=', 'value': 'gage-C'}
DEBUG:teehr.querying.filter_format:Filter: {"column":"location_id","operator":"=","value":"gage-C"}
DEBUG:teehr.querying.utils:Ordering DataFrame.
DEBUG:teehr.querying.utils:Parsing requested fields to a list of strings.
DEBUG:teehr.querying.utils:Validating requested fields.


Unnamed: 0,reference_time,value_time,value,unit_name,location_id
0,NaT,2022-01-01 01:00:00,100.0,m^3/s,gage-C
1,NaT,2022-01-01 02:00:00,100.0,m^3/s,gage-C
2,NaT,2022-01-01 03:00:00,120.0,m^3/s,gage-C
3,NaT,2022-01-01 04:00:00,140.0,m^3/s,gage-C
4,NaT,2022-01-01 05:00:00,160.0,m^3/s,gage-C


In [12]:
from teehr.models.filters import TimeseriesFilter
from teehr.models.filters import FilterOperators
fields = eval.primary_timeseries.field_enum()

In [10]:
ts_df = eval.primary_timeseries.query(
    filters=[
        TimeseriesFilter(
            column=fields.value_time,
            operator=FilterOperators.gt,
            value="2022-01-01",
        ),
        TimeseriesFilter(
            column=fields.value_time,
            operator=FilterOperators.lt,
            value="2022-01-02",
        ),
        TimeseriesFilter(
            column=fields.location_id,
            operator=FilterOperators.eq,
            value="gage-C",
        ),
]).to_pandas()
ts_df.head()

INFO:teehr.evaluation.tables:Performing the query.
DEBUG:teehr.querying.filter_format:Validating and applying column=<TimeseriesFields.value_time: 'value_time'> operator=<FilterOperators.gt: '>'> value='2022-01-01'
DEBUG:teehr.querying.filter_format:Filter: {"column":"value_time","operator":">","value":"2022-01-01"}
DEBUG:teehr.querying.filter_format:Validating and applying column=<TimeseriesFields.value_time: 'value_time'> operator=<FilterOperators.lt: '<'> value='2022-01-02'
DEBUG:teehr.querying.filter_format:Filter: {"column":"value_time","operator":"<","value":"2022-01-02"}
DEBUG:teehr.querying.filter_format:Validating and applying column=<TimeseriesFields.location_id: 'location_id'> operator=<FilterOperators.eq: '='> value='gage-C'
DEBUG:teehr.querying.filter_format:Filter: {"column":"location_id","operator":"=","value":"gage-C"}


Unnamed: 0,reference_time,value_time,value,unit_name,location_id


In [13]:
# Using the query method to filter the timeseries where filters are passed in as arguments.
# The result is returned as a Spark DataFrame, which is further "selected" and "ordered" before
# being converted to a pandas DataFrame.
ts_sdf = eval.primary_timeseries.query(
    filters=[
        "value_time > '2022-01-01'",
        "value_time < '2022-01-02'",
        "location_id = 'gage-C'"
    ]
).to_sdf()
ts_df = ts_sdf.select("value_time", "location_id", "value").orderBy("value").toPandas()
ts_df.head()

INFO:teehr.evaluation.tables:Performing the query.
DEBUG:teehr.querying.filter_format:Validating and applying value_time > '2022-01-01'
DEBUG:teehr.querying.filter_format:Validating and applying value_time < '2022-01-02'
DEBUG:teehr.querying.filter_format:Validating and applying location_id = 'gage-C'


Unnamed: 0,value_time,location_id,value
0,2022-01-01 23:00:00,gage-C,20.0
1,2022-01-01 22:00:00,gage-C,30.0
2,2022-01-01 21:00:00,gage-C,40.0
3,2022-01-01 20:00:00,gage-C,50.0
4,2022-01-01 19:00:00,gage-C,60.0
