# Hyperparameter Tuning

Determine the optimal combination of segments and periods for time series aggregation.

Author: Leander Kotzur

In [None]:
import os

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio

import tsam
from tsam import ClusterConfig
from tsam.tuning import find_pareto_front

pio.renderers.default = "notebook"

## Input data

Read in time series from testdata.csv with pandas

In [None]:
raw = pd.read_csv("testdata.csv", index_col=0)
raw = raw.rename(
    columns={"T": "Temperature", "Load": "Demand", "Wind": "Wind", "GHI": "Solar"}
)
period_hours = 24

Plot the original data

In [None]:
tsam.plot.heatmaps(raw, period_hours=period_hours, title="Original Data")

## Find Pareto-optimal aggregations

Use `find_pareto_front()` to explore the Pareto-optimal combinations of periods and segments.

In [None]:
pareto_results = find_pareto_front(
    raw,
    period_hours=period_hours,
    max_timesteps=100,  # Limit for faster demo (use 8760 for full exploration)
    cluster=ClusterConfig(method="hierarchical", representation="duration"),
    n_jobs=-1,
)

Show the final result

In [None]:
last_result = pareto_results[-1]
print(
    f"Final: {last_result.optimal_n_periods} periods, {last_result.optimal_n_segments} segments, RMSE: {last_result.optimal_rmse:.4f}"
)

In [None]:
reconstructed = last_result.best_result.reconstruct()
tsam.plot.heatmaps(
    reconstructed,
    reference_data=raw,
    period_hours=period_hours,
    title="Reconstructed Data",
)

## Animated visualization

Animate through all Pareto-optimal aggregations to visualize the trade-off between compression and accuracy.

In [None]:
n_days = len(raw) // period_hours
n_vars = len(raw.columns)

# Build 4D array: (frames, variables, hours, days)
frames_data, labels = [], []
for result in reversed(pareto_results):
    p, s = result.optimal_n_periods, result.optimal_n_segments
    labels.append(f"{round((1 - s * p / len(raw)) * 100, 1)}% ({p}p x {s}s)")

    # Reshape to (n_vars, period_hours, n_days)
    data = (
        result.best_result.reconstruct().values.reshape(n_days, period_hours, n_vars).T
    )
    frames_data.append(data)

img_stack = np.stack(frames_data)  # Shape: (frames, vars, hours, days)

In [None]:
fig = px.imshow(
    img_stack,
    animation_frame=0,
    facet_col=1,
    color_continuous_scale="RdYlBu_r",
    aspect="auto",
    labels={"x": "Day", "y": "Hour", "facet_col": "Variable"},
    title="Time Series Aggregation",
)

# Update slider labels
for i, step in enumerate(fig.layout.sliders[0].steps):
    step["label"] = labels[i]

# Update facet titles with variable names
fig.for_each_annotation(
    lambda a: a.update(text=raw.columns[int(a.text.split("=")[-1])])
)
fig.update_layout(height=400)
fig.show()

## Save results

In [None]:
pareto_df = pd.DataFrame(
    [
        {
            "segments": r.optimal_n_segments,
            "periods": r.optimal_n_periods,
            "rmse": r.optimal_rmse,
        }
        for r in pareto_results
    ]
)
pareto_df.to_csv(os.path.join("results", "paretoOptimalAggregation.csv"))
fig.write_html(os.path.join("results", "animation.html"))