In [None]:
from teehr import Evaluation
from pathlib import Path

In [None]:
# Set a path to the directory where the evaluation will be created
TEST_STUDY_DIR = Path(Path().home(), "temp", "real_study")
TEST_STUDY_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# Create an Evaluation object
ev = Evaluation(dir_path=TEST_STUDY_DIR)

# Enable logging
ev.enable_logging()

In [None]:
ev.joined_timeseries.to_pandas()

In [None]:
from teehr import Metrics as metrics

In [None]:
ev.metrics.query(
    order_by=["primary_location_id", "month"],
    group_by=["primary_location_id", "month"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_pandas()

In [None]:
jt_fields = ev.joined_timeseries.field_enum()

In [None]:
ev.metrics.query(
    order_by=["primary_location_id"],
    group_by=["primary_location_id"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_pandas()

In [None]:
from teehr.models.metrics.bootstrap_models import Bootstrappers

In [None]:
# Define a bootstrapper with custom parameters.
cb = Bootstrappers.CircularBlock(
    seed=50,
    reps=500,
    block_size=10,
    quantiles=[0.05, 0.95]
)
kge_cb = metrics.KlingGuptaEfficiency(bootstrap=cb)
kge_cb.output_field_name = "kge_cb"

In [None]:
# Define a bootstrapper with custom parameters.
gum = Bootstrappers.Gumboot(
    seed=50,
    reps=500,
    quantiles=[0.05, 0.95]
)
kge_gum = metrics.KlingGuptaEfficiency(bootstrap=gum)
kge_gum.output_field_name = "kge_gum"



In [None]:
include_metrics = [kge_cb, kge_gum, metrics.KlingGuptaEfficiency()]

In [None]:
metrics_gdf = ev.metrics.query(
    include_metrics=include_metrics,
    group_by=["primary_location_id"],
    order_by=["primary_location_id"]
).to_pandas()
metrics_gdf

In [None]:
sdf = ev.metrics.query(
    include_metrics=include_metrics,
    group_by=["primary_location_id"],
    order_by=["primary_location_id"]
).to_sdf()

In [None]:
%%time
import pyspark.sql.functions as F
keys_cb = sdf.select(
    F.explode(F.map_keys(F.col("kge_cb"))),
).distinct()
cb_key_list = list(map(lambda row: row[0], keys_cb.collect()))

keys_gum = sdf.select(
    F.explode(F.map_keys(F.col("kge_gum")))
).distinct()
gum_key_list = list(map(lambda row: row[0], keys_gum.collect()))

cb_key_cols = list(map(lambda f: F.col("kge_cb").getItem(f).alias(str(f)), cb_key_list))
gum_key_cols = list(map(lambda f: F.col("kge_gum").getItem(f).alias(str(f)), gum_key_list))
sdf.select("primary_location_id", "kling_gupta_efficiency", *cb_key_cols, *gum_key_cols).toPandas()

In [None]:
(
    ev.metrics.query(
        order_by=["primary_location_id", "month"],
        group_by=["primary_location_id", "month"],
        include_metrics=[
            metrics.Maximum(
                output_field_name="primary_max",
                input_field_names=["primary_value"]
            ),
            metrics.Maximum(
                output_field_name="secondary_max",
                input_field_names=["secondary_value"]
            ),
        ]
    )
    .to_pandas()
)

In [None]:
cb = Bootstrappers.CircularBlock(
    seed=11,
    reps=100,
    block_size=1,
    quantiles=[0.05, 0.95]
)

df = (
    ev.metrics.query(
        order_by=["primary_location_id", "month"],
        group_by=["primary_location_id", "month"],
        include_metrics=[
            metrics.Maximum(
                output_field_name="primary_max",
                input_field_names=["primary_value"]
            ),
            metrics.Maximum(
                output_field_name="secondary_max",
                input_field_names=["secondary_value"]
            ),
        ]
    )
    .query(
        order_by=["primary_location_id"],
        group_by=["primary_location_id"],
        include_metrics=[
            metrics.RelativeBias(
                input_field_names=["primary_max", "secondary_max"],
                bootstrap=cb,
                output_field_name = "rb_cb"
            )
        ]
    )
    .to_pandas()
)
df