### TEMPLATE

In [1]:
from teehr import Evaluation
from pathlib import Path

In [2]:
# Set a path to the directory where the evaluation will be created
TEST_STUDY_DIR = Path(Path().home(), "temp", "real_study")
TEST_STUDY_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# Create an Evaluation object
eval = Evaluation(dir_path=TEST_STUDY_DIR)

# Enable logging
eval.enable_logging()

In [None]:
eval.joined_timeseries.to_pandas()

In [5]:
from teehr import Metrics as metrics

In [None]:
eval.metrics.query(
    order_by=["primary_location_id", "month"],
    group_by=["primary_location_id", "month"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_pandas()

In [7]:
jt_fields = eval.joined_timeseries.field_enum()

In [None]:
eval.metrics.query(
    order_by=["primary_location_id"],
    group_by=["primary_location_id"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_pandas()

In [9]:
from teehr.models.metrics.bootstrap_models import Bootstrappers

In [10]:
# Define a bootstrapper with custom parameters.
boot = Bootstrappers.CircularBlock(
    seed=50,
    reps=500,
    block_size=10,
    quantiles=[0.05, 0.95]
)
kge = metrics.KlingGuptaEfficiency(bootstrap=boot)
kge.output_field_name = "kge_bootstrap"

include_metrics = [kge, metrics.KlingGuptaEfficiency()]

In [None]:
metrics_gdf = eval.metrics.query(
    include_metrics=include_metrics,
    group_by=["primary_location_id"],
    order_by=["primary_location_id"]
).to_geopandas()
metrics_gdf

In [12]:
from pyspark.sql.functions import avg, max

In [None]:
mdf = eval.metrics.query(
    order_by=["primary_location_id", "month"],
    group_by=["primary_location_id", "month"],
    include_metrics=[
        metrics.KlingGuptaEfficiency(),
        metrics.NashSutcliffeEfficiency(),
        metrics.RelativeBias()
    ]
).to_sdf().groupBy("primary_location_id").agg(avg("relative_bias").alias("relative_bias_avg")).toPandas()
mdf

In [14]:
# This does not work.
# eval.metrics.query(
#     order_by=["primary_location_id", "month"],
#     group_by=["primary_location_id", "month"],
#     include_metrics=[
#         metrics.KlingGuptaEfficiency(),
#         metrics.NashSutcliffeEfficiency(),
#         metrics.RelativeBias()
#     ]
# ).query(
#     order_by=["primary_location_id"],
#     group_by=["primary_location_id"],
#     include_metrics=[
#         metrics.PrimaryAverage(
#             input_field_names=["relative_bias"],
#         )
#     ]
# ).to_pandas()

In [None]:
(
    eval.joined_timeseries.to_sdf()
    .groupBy("primary_location_id", "month").agg(avg("primary_value").alias("value_avg")).orderBy("primary_location_id","month").toPandas()
)

In [None]:
(
    eval.joined_timeseries.to_sdf()
    .groupBy("primary_location_id", "month").agg(avg("primary_value").alias("value_avg"))
    .groupBy("primary_location_id").agg(max("value_avg").alias("max_value_avg"))
    .toPandas()
)

### TESTING

In [None]:
from teehr import Metrics as metrics

import itertools
from math import pi
import random


from bokeh.palettes import Turbo256

from bokeh.plotting import show, figure
from bokeh.io import output_notebook
output_notebook()

In [None]:
df_raw = eval.primary_timeseries.to_pandas()
df_raw

In [21]:
# determine bins for unique metadata
def timeseries_unique_values(df):
    # get unique values of each column, add to dict
    columns = df.columns.to_list()
    Dict = {}
    for column in columns:
        Dict[column] = df[column].unique().tolist()

    return Dict

# create plot schema
def timeseries_default_schema(df):

    # get unique variable names to determine number of tables
    unique_variables = df['variable_name'].unique().tolist()

    # init empty dict to store variable-specific plotting combinations
    schema = {}

    # get all unique plotting permutations for that variable, add to dict
    for value in unique_variables:
        df_variable = df[df['variable_name'] == value]
        unique_column_vals = timeseries_unique_values(df=df_variable)
        all_list = [unique_column_vals['configuration_name'],unique_column_vals['location_id']] # add reference time down the line
        res = list(itertools.product(*all_list))
        schema[value] = res


    return schema

# creates and displays plot
def timeseries_generate_plot(schema, df, variable):

    # get list of unique units
    unique_units = df['unit_name'].unique().tolist() # add check here to ensure only one unit type

    # create color palette
    numColors = len(schema[variable])
    sampled_colors = random.sample(range(0,len(Turbo256)-1),numColors)
    palette = Turbo256
    palette_count = 0

    # init plot
    p = figure(title="Click legend entry to toggle display of timeseries",
                y_axis_label="{} [{}]".format(variable,unique_units[0]),
                x_axis_label="Datetime",
                x_axis_type='datetime',
                sizing_mode="stretch_width",
                tools=['xwheel_zoom','reset'],
                height = 800)

    # extract timeseries from dataframe and add to plot
    for combo in schema[variable]:
        temp = df[(df['configuration_name'] == combo[0]) & (df['location_id'] == combo[1])]
        p.line(temp.value_time,
                temp.value,
                legend_label="{} - {}".format(combo[0],combo[1]),
                line_width=1,
                color=palette[sampled_colors[palette_count]])
        palette_count += 1

    # format xaxis
    p.xaxis.major_label_orientation = pi/4
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.xaxis.major_label_text_font_size = '12pt'

    # format yaxis
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'
    p.yaxis.major_label_text_font_size = '12pt'

    # format title
    p.title.text_font_size = '12pt'

    # format legend
    p.legend.location = 'top_right'
    p.legend.label_text_font_size = '14pt'
    p.legend.border_line_width = 1
    p.legend.border_line_color = 'black'
    p.legend.border_line_alpha = 1.0
    p.legend.background_fill_color = 'white'
    p.legend.background_fill_alpha = 1.0
    p.legend.click_policy = 'hide'

    # display plot
    show(p)

    return

# determines how many plots to generate -- main method (i.e. calls all other functions)
def timeseries_plot(df):

    # generate default plotting schema (used if no args are provided)
    schema = timeseries_default_schema(df=df)

    # get list of unique parameters which determines number of plots to generate
    unique_variables = list(schema.keys())

    for variable in unique_variables:

        # trim the dataframe to only entries for that unique parameter
        df_variable = df[df['variable_name'] == variable]

        # generate variable specific plot
        timeseries_generate_plot(schema=schema, df=df_variable, variable=variable)

    return


In [None]:
# call plotting function
timeseries_plot(df=df_raw)
