In [None]:
import os
import pyrootutils

root = pyrootutils.setup_root(
    search_from=os.getcwd(),
    indicator=[".git", "pyproject.toml"],
    pythonpath=True,
    dotenv=True,
)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
#%matplotlib notebook


import pandas as pd
import seaborn as sns
import mlflow
import matplotlib

In [None]:
def search_mlflow(search_experiment_name, mlflow_dir=os.path.join(root, "logs", "mlflow", "mlruns")):
    tags_model_to_name = dict(XGB="XGBoost", TCN="TCN", RNN="LSTM", Regression="ElasticNet")
    if isinstance(search_experiment_name, str):
        search_experiment_name = [search_experiment_name]
    mlflow.set_tracking_uri(f"file://{mlflow_dir}")
    df = mlflow.search_runs(experiment_names=search_experiment_name)
    df['model_name'] = df['tags.ensemble'].apply(lambda x: "ReWTS" if x == "True" else "Global")
    df['tags.model'] = df['tags.model'].apply(lambda x: tags_model_to_name[x.replace("Model", "")])

    return df
    

def set_matplotlib_attributes(font_size=8, font="DejaVu Sans"):
    matplotlib.rcParams.update({'font.size': font_size, 'font.family': font})

def set_figure_size(fig, column_span, height=None):
    if height is None:
        height = 5 if column_span == "double" else 10
    
    cm = 1 / 2.54
    if column_span == "single":
        fig_width = 8.4 * cm
    elif column_span == "double":
        fig_width = 17.4 * cm
    else:
        raise ValueError()
    figsize = (fig_width, height * cm)

    fig.set_size_inches(*figsize)

def save_figure(fig, path):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    fig.savefig(path + ".pdf", format="pdf", bbox_inches="tight")
    fig.savefig(path + ".png", format="png", bbox_inches="tight")

# Inspect Chunk Length and Lookback Experiment Results
This notebook aids in inspecting the results of the varying data chunk length and lookback experiments, and in collecting the necessary information for the paper: gathering the figures and outputting latex table data.

## Chunk Length

In [None]:
fig_column_span = "single"
fig_height = 4
set_matplotlib_attributes()

# TODO: perhaps only legend in first figure?

# Configurable metric for y-axis
metric_name = "test_mse"
metric_column = f"metrics.{metric_name}"


# Column names
chunk_length_column = "params.datamodule/chunk_length"
chunk_length_plot_name = "Chunk length (days)"
model_name_column = "model_name"
metric_plot_name = " ".join(metric_name.replace("test_", "").split("_")).upper()

model_order = ["ReWTS", "Global"]

models = ["elastic_net", "xgboost-es", "tcn", "rnn"]
chunk_lengths = [720, 1008, 2016, 3024, 4032]
dataset = "electricity"

for model in models:
    search_experiment_name = [f"{dataset}_eval-it_{model}_chunk-length-{chunk_length}" for chunk_length in chunk_lengths] 
    df = search_mlflow(search_experiment_name)
    # Group DataFrame by 'chunk_length' and 'model_name' and get the mean of the metric column
    grouped = df.groupby([chunk_length_column, model_name_column])[metric_column].mean().reset_index()
    
    # Rename columns for better plotting
    grouped = grouped.rename(columns={metric_column: metric_plot_name, chunk_length_column: chunk_length_plot_name})
    
    # Sort by 'chunk_length' numerically
    grouped[chunk_length_plot_name] = grouped[chunk_length_plot_name].astype(int)  # Convert to integer if it's not already
    grouped = grouped.sort_values(by=chunk_length_plot_name)
    
    # Plotting
    plot = sns.lineplot(data=grouped, x=chunk_length_plot_name, y=metric_plot_name, hue=model_name_column, marker="o", hue_order=model_order)
    set_figure_size(plot.get_figure(), column_span=fig_column_span, height=fig_height)
    
    # Set x-ticks to only where there is data
    unique_chunk_lengths = grouped[chunk_length_plot_name].unique()
    plot.set_xticks(unique_chunk_lengths)
    
    # Convert x-tick labels from count of 10 minutes to days
    plot.set_xticklabels([f"{length / (6 * 24):.0f}" for length in unique_chunk_lengths])
    
    # Remove the legend title
    legend = plot.legend_
    legend.set_title('')
    legend.set_frame_on(False)
    
    plot.set_title(df["tags.model"][0])

    fig_folder_name = f"chunk_lengths_{dataset}"
    #fig_folder_name = "-".join(search_experiment_name[0].split("-")[:-1])
    fig_path = os.path.join(root, "figures", fig_folder_name, f"chunk_length_metrics_{model}")
    save_figure(plot.figure, fig_path)
    plt.show()

In [None]:
tags_model_to_name = dict(XGB="XGBoost", TCN="TCN", RNN="LSTM", Regression="ElasticNet")
print(tags_model_to_name[df["tags.model"][0].replace("Model", "")])
grouped.groupby("model_name")[metric_plot_name].mean()

## Lookback

In [None]:
lookback_lenghts = [288, 432, 720, 1008, 1440]
models = ["xgboost-es", "tcn", "rnn", "elastic_net"]
dataset = "electricity"

search_experiment_name = [f"{dataset}_eval-it_{model}_lookback-{length}" for length in lookback_lenghts for model in models]
df = search_mlflow(search_experiment_name)

fig_column_span = "single"
fig_height = 5
set_matplotlib_attributes()

# Configurable metric for y-axis
metric_name = "test_mse"
metric_column = f"metrics.{metric_name}"

tags_model_to_name = dict(XGB="XGBoost", TCN="TCN", RNN="LSTM", Regression="ElasticNet")

# Column names
chunk_length_column = "params.ensemble/lookback_data_length"
chunk_length_plot_name = "Lookback length (days)"

model_type_column = "tags.model"
metric_plot_name = " ".join(metric_name.replace("test_", "").split("_")).upper()

# Group DataFrame by 'chunk_length' and 'model_name' and get the mean of the metric column
grouped = df.groupby([chunk_length_column, model_type_column])[metric_column].mean().reset_index()

# Rename columns for better plotting
grouped = grouped.rename(columns={metric_column: metric_plot_name, chunk_length_column: chunk_length_plot_name})

# Sort by 'chunk_length' numerically
grouped[chunk_length_plot_name] = grouped[chunk_length_plot_name].astype(int)  # Convert to integer if it's not already
grouped = grouped.sort_values(by=chunk_length_plot_name)

# Plotting
plot = sns.lineplot(data=grouped, x=chunk_length_plot_name, y=metric_plot_name, hue=model_type_column, marker="o")
set_figure_size(plot.get_figure(), column_span=fig_column_span, height=fig_height)

# Set x-ticks to only where there is data
unique_chunk_lengths = grouped[chunk_length_plot_name].unique()
plot.set_xticks(unique_chunk_lengths)

# Convert x-tick labels from count of 10 minutes to days
plot.set_xticklabels([f"{length / (6 * 24):.0f}" for length in unique_chunk_lengths])

# Remove the legend title
plt.legend(loc="lower left", bbox_to_anchor=(0.1, 0.125), ncol=2)
legend = plot.legend_
legend.set_title('')
legend.set_frame_on(False)
#legend.set_bbox_to_anchor((0.25, 0.75))

plot_title = search_experiment_name[0].split("_")[0].capitalize()
plot.set_title(plot_title)

fig_folder_name = f"lookback_length_{plot_title}"
fig_path = os.path.join(root, "figures", fig_folder_name, f"lookback_length_metrics_{plot_title}")
save_figure(plot.figure, fig_path)