In [None]:
from teehr import Evaluation
from pathlib import Path

In [None]:
# Set a path to the directory where the evaluation will be created
TEST_STUDY_DIR = Path(Path().home(), "temp", "real_study")
TEST_STUDY_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# Create an Evaluation object
eval = Evaluation(dir_path=TEST_STUDY_DIR)

# Enable logging
eval.enable_logging()

In [None]:
from teehr import Metrics as metrics

In [None]:
eval.metrics.query(
    order_by=["primary_location_id", "month"],
    group_by=["primary_location_id", "month"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_pandas()

In [None]:
jt_fields = eval.joined_timeseries.field_enum()

In [None]:
eval.metrics.query(
    order_by=["primary_location_id"],
    group_by=["primary_location_id"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_pandas()

In [None]:
from teehr.models.metrics.bootstrap_models import Bootstrappers

In [None]:
# Define a bootstrapper with custom parameters.
boot = Bootstrappers.CircularBlock(
    seed=50,
    reps=500,
    block_size=10,
    quantiles=[0.05, 0.95]
)
kge = metrics.KlingGuptaEfficiency(bootstrap=boot)
kge.output_field_name = "kge_bootstrap"

include_metrics = [kge, metrics.KlingGuptaEfficiency()]

In [None]:
metrics_gdf = eval.metrics.query(
    include_metrics=include_metrics,
    group_by=["primary_location_id"],
    order_by=["primary_location_id"]
).to_geopandas()
metrics_gdf

In [None]:
from pyspark.sql.functions import avg, max

In [None]:
mdf = eval.metrics.query(
    order_by=["primary_location_id", "month"],
    group_by=["primary_location_id", "month"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_sdf().groupBy("primary_location_id").agg(avg("relative_bias").alias("relative_bias_avg")).toPandas()
mdf

In [None]:
# This does not work.
eval.metrics.query(
    order_by=["primary_location_id", "month"],
    group_by=["primary_location_id", "month"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).query(
    order_by=["primary_location_id"],
    group_by=["primary_location_id"],
    include_metrics=[
        metrics.PrimaryAverage(
            input_field_names=["relative_bias"],
        )
    ]
).to_pandas()

In [None]:
(
    eval.joined_timeseries.to_sdf()
    .groupBy("primary_location_id", "month").agg(avg("primary_value").alias("value_avg")).orderBy("primary_location_id","month").toPandas()
)

In [None]:
(
    eval.joined_timeseries.to_sdf()
    .groupBy("primary_location_id", "month").agg(avg("primary_value").alias("value_avg"))
    .groupBy("primary_location_id").agg(max("value_avg").alias("max_value_avg"))
    .toPandas()
)