# Hyperparameter Tuning

Determine the optimal combination of segments and periods for time series aggregation.

Author: Leander Kotzur

In [None]:
import os

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio

import tsam
from tsam import ClusterConfig
from tsam.tuning import find_pareto_front

pio.renderers.default = "notebook"

## Input data

Read in time series from testdata.csv with pandas

In [None]:
raw = pd.read_csv("testdata.csv", index_col=0)
raw = raw.rename(
    columns={"T": "Temperature", "Load": "Demand", "Wind": "Wind", "GHI": "Solar"}
)
period_hours = 24

Plot the original data

In [None]:
tsam.plot.heatmaps(raw, period_hours=period_hours, title="Original Data")

## Find Pareto-optimal aggregations

Use `find_pareto_front()` to explore the Pareto-optimal combinations of periods and segments.

In [None]:
pareto_results = find_pareto_front(
    raw,
    period_hours=period_hours,
    max_timesteps=200,  # Limit for faster demo (use 8760 for full exploration)
    cluster=ClusterConfig(method="hierarchical", representation="duration"),
    n_jobs=-1,
)

Visualize the Pareto front - the trade-off between compression and accuracy.

In [None]:
pareto_df = pd.DataFrame(
    [
        {
            "timesteps": r.optimal_n_periods * r.optimal_n_segments,
            "periods": r.optimal_n_periods,
            "segments": r.optimal_n_segments,
            "rmse": r.optimal_rmse,
        }
        for r in pareto_results
    ]
)

fig = px.line(
    pareto_df,
    x="timesteps",
    y="rmse",
    markers=True,
    labels={"timesteps": "Timesteps (periods x segments)", "rmse": "RMSE"},
    title="Pareto Front: Compression vs Accuracy",
    hover_data=["periods", "segments"],
    range_y=(0, None),
)
fig.show()

Show the final result

In [None]:
last_result = pareto_results[-1]
print(
    f"Final: {last_result.optimal_n_periods} periods, {last_result.optimal_n_segments} segments, RMSE: {last_result.optimal_rmse:.4f}"
)

In [None]:
reconstructed = last_result.best_result.reconstruct()
tsam.plot.heatmaps(
    reconstructed,
    reference_data=raw,
    period_hours=period_hours,
    title="Reconstructed Data",
)

## Animated visualization

Animate through all Pareto-optimal aggregations to visualize the trade-off between compression and accuracy.

In [None]:
n_days = len(raw) // period_hours
n_vars = len(raw.columns)

# Get normalization parameters from original data
raw_min = raw.min()
raw_range = raw.max() - raw.min()

frames_data, labels = [], []
for result in reversed(pareto_results):
    p, s = result.optimal_n_periods, result.optimal_n_segments
    labels.append(f"{round((1 - s * p / len(raw)) * 100, 1)}% ({p}p x {s}s)")

    # Normalize at DataFrame level, then reshape
    reconstructed = result.best_result.reconstruct()
    normalized = (reconstructed - raw_min) / raw_range
    data = normalized.values.reshape(n_days, period_hours, n_vars).transpose(2, 1, 0)

    frames_data.append(data.reshape(-1, n_days))

img_stack = np.stack(frames_data)

In [None]:
fig = px.imshow(
    img_stack,
    animation_frame=0,
    color_continuous_scale="RdYlBu_r",
    aspect="auto",
    labels={"x": "Day", "y": "Hour"},
    title="Time Series Aggregation",
)

for i, step in enumerate(fig.layout.sliders[0].steps):
    step["label"] = labels[i]

tickvals = [period_hours * i + period_hours // 2 for i in range(n_vars)]
fig.update_yaxes(tickvals=tickvals, ticktext=list(raw.columns))
fig.update_layout(height=600, coloraxis_showscale=False)
fig.show()

## Save results

In [None]:
pareto_df.to_csv(os.path.join("results", "paretoOptimalAggregation.csv"))
fig.write_html(os.path.join("results", "animation.html"))