# Hyperparameter Tuning

This notebook demonstrates how to automatically find optimal aggregation parameters using `find_optimal_combination`.

Instead of manually choosing the number of periods and segments, you can specify a target data reduction and let tsam find the best combination.

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import plotly.io as pio

import tsam
from tsam.tuning import find_optimal_combination

pio.renderers.default = "notebook"

### Load test data

In [None]:
raw = pd.read_csv("testdata.csv", index_col=0, parse_dates=True)
print(f"Shape: {raw.shape}")
print(f"Timesteps: {len(raw)}")
raw.head()

### Find optimal combination for 2% data reduction

The `find_optimal_combination` function searches for the best period/segment combination that achieves the target data reduction while minimizing RMSE.

Use `n_jobs=-1` to utilize all available CPUs for faster search.

In [None]:
result = find_optimal_combination(
    raw,
    data_reduction=0.02,  # Target: 2% of original timesteps
    period_hours=24,
    n_jobs=-1,  # Use all CPUs
    show_progress=True,
)

print("\nOptimal configuration:")
print(f"  Periods: {result.optimal_n_periods}")
print(f"  Segments: {result.optimal_n_segments}")
print(f"  RMSE: {result.optimal_rmse:.4f}")
print(f"  Timesteps: {result.optimal_n_periods * result.optimal_n_segments}")
print(
    f"  Reduction: {result.optimal_n_periods * result.optimal_n_segments / len(raw) * 100:.2f}%"
)

### View the search history

The tuning result includes the history of all tested configurations.

In [None]:
history_df = pd.DataFrame(result.history)
history_df.sort_values("rmse")

### Use the optimal result

The `best_result` attribute contains the full `AggregationResult` for the optimal configuration.

In [None]:
best = result.best_result

print(f"Typical periods shape: {best.typical_periods.shape}")
print("\nAccuracy metrics:")
print(best.accuracy)

In [None]:
reconstructed = best.reconstruct()

tsam.plot.heatmap(
    reconstructed,
    column="T",
    period_hours=24,
    title=f"Optimal: {result.optimal_n_periods} periods x {result.optimal_n_segments} segments",
)

### Compare different reduction targets

Let's see how the optimal configuration changes with different data reduction targets.

In [None]:
reductions = [0.01, 0.02, 0.05, 0.10]
results_comparison = {}

for reduction in reductions:
    r = find_optimal_combination(
        raw,
        data_reduction=reduction,
        period_hours=24,
        n_jobs=-1,
        show_progress=False,
    )
    results_comparison[f"{int(reduction * 100)}%"] = r
    print(
        f"{int(reduction * 100)}% reduction: {r.optimal_n_periods} periods x {r.optimal_n_segments} segments, RMSE={r.optimal_rmse:.4f}"
    )

In [None]:
comparison_data = {"Original": raw}
for label, r in results_comparison.items():
    comparison_data[label] = r.best_result.reconstruct()

tsam.plot.compare(
    comparison_data,
    column="Load",
    plot_type="duration_curve",
    title="Duration Curve Comparison - Different Reduction Targets",
)

### Helper functions

Use `find_periods_for_reduction` and `find_segments_for_reduction` to calculate parameters for a specific reduction target.

In [None]:
from tsam.tuning import find_periods_for_reduction, find_segments_for_reduction

n_timesteps = len(raw)
target_reduction = 0.01  # 1% of original

# How many periods can we have with 24 segments?
max_periods = find_periods_for_reduction(
    n_timesteps, n_segments=24, data_reduction=target_reduction
)
print(f"With 24 segments: max {max_periods} periods for 1% reduction")

# How many segments can we have with 8 periods?
max_segments = find_segments_for_reduction(
    n_timesteps, n_periods=8, data_reduction=target_reduction
)
print(f"With 8 periods: max {max_segments} segments for 1% reduction")