# LSTM model

In this notebook, we design and tune an Encoder–Decoder LSTM model for 1-day-ahead and 7-day-ahead forecasting. Separate tuning procedures are performed for each of the five time series, identified by the unique IDs F1 through F5.

The notebook is organized into the following sections:

1. **Preliminaries**:
   Data loading, module imports, seeding, and device setup.

2. **Pipeline**:
   Walkthrough of the LSTM training/evaluation pipeline.

3. **Hyperparameter tuning (Optuna)**:
   Start a new study or load and inspect an existing one.

4. **Tuning results**:
   Procedure details and outcome analysis.

5. **Testing**:
   Generate final predictions on the test period using the tuned model.


## Preliminaries

You can run the notebook in two ways:

1. **Google Colab**: place the project folder `heat-forecast` in **MyDrive**. The setup cell below will mount Drive and automatically add `MyDrive/heat-forecast/src` to `sys.path` so `import heat_forecast` works out of the box.

2. **Local machine**:

   * **Installing our package:** from the project root, run `pip install -e .` once (editable install). Then you can open the notebook anywhere and import the package normally.
   * **Alternative:** if you’re running the notebook from `.../heat-forecast/notebooks/` without installing the package, the setup cell will detect `../src` and automatically add it to `sys.path`.

In [None]:
# --- Detect if running on Google Colab & Set base dir ---
# %cd /home/giovanni.lombardi/heat-forecast/notebooks
import subprocess
from pathlib import Path
import sys

def in_colab() -> bool:
    try:
        import google.colab  # type: ignore
        return True
    except Exception:
        return False

# Install required packages only if not already installed
def pip_install(pkg: str):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])

# Set base directory and handle environment
if in_colab():
    # Make sure IPython is modern (avoids the old %autoreload/imp issue if you ever use it)
    pip_install("ipython>=8.25")
    pip_install("ipykernel>=6.29")
    
    def install(package):
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

    for pkg in ["statsmodels", "statsforecast", "mlforecast"]:
        pip_install(pkg)

    # Mount Google Drive
    from google.colab import drive  # type: ignore
    drive.mount('/content/drive')

    # Set base directory to your Drive project folder
    BASE_DIR = Path('/content/drive/MyDrive/heat-forecast')

    # Add `src/` to sys.path for custom package imports
    SRC_PATH = BASE_DIR / 'src'
    if str(SRC_PATH) not in sys.path:
        sys.path.append(str(SRC_PATH))

    # Sanity checks (helpful error messages if path is wrong)
    assert SRC_PATH.exists(), f"Expected '{SRC_PATH}' to exist. Fix BASE_DIR."
    pkg_dir = SRC_PATH / "heat_forecast"
    assert pkg_dir.exists(), f"Expected '{pkg_dir}' package directory."
    init_file = pkg_dir / "__init__.py"
    assert init_file.exists(), f"Missing '{init_file}'. Add it so Python treats this as a package."

else:
    # Local: either rely on editable install (pip install -e .) or add src/ when running from repo
    # Assume notebook lives in PROJECT_ROOT/notebooks/
    BASE_DIR = Path.cwd().resolve().parent
    SRC_PATH = BASE_DIR / "src"

    added_src = False
    if (SRC_PATH / "heat_forecast").exists() and str(SRC_PATH) not in sys.path:
        sys.path.append(str(SRC_PATH))
        added_src = True

# --- Logging setup ---
import logging
from zoneinfo import ZoneInfo
from datetime import datetime

LOG_DIR  = (BASE_DIR / "logs")
LOG_DIR.mkdir(parents=True, exist_ok=True)
LOG_FILE = LOG_DIR / "run.log"
PREV_LOG = LOG_DIR / "run.prev.log"

# If there's a previous run.log with content, archive it to run.prev.log
if LOG_FILE.exists() and LOG_FILE.stat().st_size > 0:
    try:
        # Replace old run.prev.log if present
        if PREV_LOG.exists():
            PREV_LOG.unlink()
        LOG_FILE.rename(PREV_LOG)
    except Exception as e:
        # Fall back to truncating if rename fails (e.g., file locked)
        print(f"[warn] Could not archive previous log: {e}. Truncating current run.log.")
        LOG_FILE.write_text("")

# Configure logging: fresh file for this run + echo to notebook/stdout
file_handler   = logging.FileHandler(LOG_FILE, mode="w", encoding="utf-8")
stream_handler = logging.StreamHandler(sys.stdout)

fmt = logging.Formatter("%(asctime)s | %(levelname)s | %(name)s | %(message)s",
                        datefmt="%m-%d %H:%M:%S")
file_handler.setFormatter(fmt)
stream_handler.setFormatter(fmt)

root = logging.getLogger()
root.handlers[:] = [file_handler, stream_handler]  # replace handlers (important in notebooks)
root.setLevel(logging.INFO)

# Use Rome time
logging.Formatter.converter = lambda *args: datetime.now(ZoneInfo("Europe/Rome")).timetuple()

logging.captureWarnings(True)
logging.info("=== Logging started (fresh current run) ===")
logging.info("Previous run (if any): %s", PREV_LOG if PREV_LOG.exists() else "none")

if added_src:
    logging.info("heat_forecast not installed; added src/ to sys.path")
else:
    logging.info("heat_forecast imported without modifying sys.path (likely installed)")

OPTUNA_DIR = BASE_DIR / "results" / "finetuning" / "lstm"
OPTUNA_DIR.mkdir(parents=True, exist_ok=True)
logging.info("BASE_DIR (make sure it's '*/heat-forecast/', else cd and re-run): %s", BASE_DIR)
logging.info("LOG_DIR: %s", LOG_DIR)
logging.info("OPTUNA_DIR: %s", OPTUNA_DIR)

Ensure [compatibility with Numba](https://numba.readthedocs.io/en/stable/user/installing.html#numba-support-info).

In [None]:
import sys, numpy, numba
logging.info("=== Current Environment ===")
logging.info("Python : %s", sys.version.split()[0])
logging.info("NumPy  : %s", numpy.__version__)
logging.info("Numba  : %s", numba.__version__)

Imports:

In [None]:
# --- Magic Commands ---
%load_ext autoreload
%autoreload 2

# --- Standard Library ---
import os
os.environ["OPTUNA_LOGGING_DISABLE_DEFAULT_HANDLER"] = "1" # prevent Optuna from attaching its handler
import stat
import logging
from datetime import datetime
from itertools import product
import torch
import optuna
import copy

# --- Third-Party Libraries ---
import numpy as np
import pandas as pd
pd.set_option('display.float_format', '{:.3f}'.format)

import yaml
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from tqdm.notebook import tqdm
from IPython.display import display, HTML

# --- Plotting Configuration ---
plt.style.use("seaborn-v0_8")
plt.rcParams['font.size'] = 14
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 10
plt.rcParams['figure.titlesize'] = 18
mpl.rcParams['axes.grid'] = True
mpl.rcParams['axes.grid.which'] = 'both'

# --- YAML Customization ---
from heat_forecast.utils.yaml import safe_dump_yaml

# --- Safe File Deletion Helper ---
from heat_forecast.utils.fileshandling import remove_tree

# --- Project-Specific Imports ---
from heat_forecast.utils.cv_utils import get_cv_params_for_test
from heat_forecast.pipeline.lstm import (
    NormalizeConfig, ModelConfig, DataConfig, FeatureConfig, TrainConfig, 
    LSTMRunConfig, LSTMPipeline
)
from heat_forecast.utils.optuna import (
    OptunaStudyConfig, run_study, continue_study, describe_suggester, rename_study, clone_filtered_study
)
from heat_forecast.utils.plotting import interactive_plot_cutoff_results

logging.info("All imports successful.")

Import pre-elaborated data.

In [None]:
heat_path = BASE_DIR / 'data' / 'timeseries_preprocessed' / 'heat.csv'
aux_path = BASE_DIR / 'data' / 'timeseries_preprocessed' / 'auxiliary.csv'
heat_df = pd.read_csv(heat_path, parse_dates=['ds'])
aux_df = pd.read_csv(aux_path, parse_dates=['ds'])
logging.info("Loaded heat data: %s", heat_path.relative_to(BASE_DIR))
logging.info("Loaded auxiliary data: %s", aux_path.relative_to(BASE_DIR))

Set device (the LSTM pipeline automatically uses "cuda" if available):

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"Using device: {DEVICE}")

Set a seed (set to None to skip):

In [None]:
SEED = 42
logging.info(f"Using seed: {SEED}")

## Pipeline

### Example of usage of the pipeline

Example configuration:

In [None]:
# Choose unique_id and filter
unique_id = 'F1'
heat_id_df = heat_df[heat_df['unique_id'] == unique_id]
aux_id_df = aux_df[aux_df['unique_id'] == unique_id]


# Set configuration of the pipeline
config = LSTMRunConfig(
    model=ModelConfig(
        input_len=72, 
        output_len=168, 
        hidden_size=8, 
        num_layers=1,
        head="linear",
        dropout=0.0,
        use_ar="24h"
    ),
    data=DataConfig(
        batch_size=64,   
    ),
    features=FeatureConfig(
        exog_vars=("temperature",),
        hour_averages=(),        
        endog_hour_lags=(24, 168,),  
        use_differences=False,
        use_cold_season=False,
        cold_temp_threshold=15,
        include_exog_lags=False
    ),
    train=TrainConfig(
        learning_rate=5e-4, 
        n_epochs=20, 
        patience=5, 
        tf_drop_epochs=4,
        tf_mode="linear",
    ),
    norm=NormalizeConfig(mode="global"),
    seed=SEED,
)

# Build the pipeline
pipe = LSTMPipeline(target_df=heat_id_df, config=config, aux_df=aux_id_df)

Generate loaders:

In [None]:
# Choose params for data split
end_train = pd.Timestamp("2023-11-01 23:00")
end_val   = pd.Timestamp("2024-04-01 23:00")  # val will start at end_train + 1h automatically

# Build datasets and load
train_loader, val_loader = pipe.make_loaders(
    end_train=end_train,
    end_val=end_val,
)

Description of the prepared dataset:

In [None]:
report_data = pipe.describe_dataset()

Description of the prepared model:

In [None]:
report_model = pipe.describe_model()

The floowing method `sanity_overfit_one_batch` performs a **sanity check** by trying to overfit the model on a tiny, fixed subset of the training data. It trains for up to `max_epochs` without early stopping, then checks whether the training loss drops by at least a required relative amount (`tol_rel_drop`).

If successful, it signals the pipeline and optimization are working correctly; otherwise, it may indicate bugs or misconfigurations.

In [None]:
check = pipe.sanity_overfit_one_batch(
    train_loader,
    max_epochs=300,         # bump if needed
    tol_rel_drop=0.8,      # percentage drop in ORIGINAL units (MAE by default) to consider as "passed"
    restore_weights=True,   # keep your model “fresh” after the check
    n_samples=16,           # tiny fixed subset
    gen_seed=SEED           # deterministic subset selection
)

This method trains the model under the current configuration.
If a validation loader is provided, it monitors validation loss, applies early stopping, and logs the best epoch reached; otherwise, it trains for the full number of epochs and reports final training loss.
In all cases, it applies the configured learning schedule (teacher forcing, LR drop, gradient clipping), records history and metrics, and returns a results dictionary with training outcomes.

In [None]:
pipe.fit(train_loader, val_loader)

The `predict` method produces forecasts on the original target scale. It can run in two modes:

* Batch mode: pass a `val_loader` to predict multiple windows, returning `unique_id`, `ds`, `cutoff`, and predictions.
* Single-horizon mode: pass a `cutoff` to predict one forecast horizon, returning `unique_id`, `ds`, and predictions.

We use the first mode to compute predictions over several months.

In [None]:
# Ensure this holds: start_test >= end_train + gap + 1h, else will raise
start_test = pd.Timestamp("2024-10-01 00:00:00")
end_test   = pd.Timestamp("2024-12-01 23:00:00")
gap        = int(pipe.config.data.gap_hours) # default is 0

# When norm.mode="global", statistics are computed from the training slice. 
# If no training range is provided, the pipeline will reuse cached stats (if available). 
# To avoid surprises, it is recommended to always specify the train dates explicitly.
_, test_loader = pipe.make_loaders(
    end_train=end_train,
    start_val=start_test,
    end_val=end_test,
)

# Copute predictions on the given loader
preds = pipe.predict(val_loader=test_loader, alias="LSTM")
display(preds.head())

In [None]:
plotly_forecasts_with_exog(
    target_df=heat_df,
    cv_df=preds,
    aux_df=aux_df,
    exog_vars=["temperature"],
    n_windows=1,
    add_context=True,
    only_aligned_to_day=True,
    figsize=(11, 10),
)

## Hyperparameters tuning (Optuna)

We tune the model using **Optuna**, a Python framework for hyperparameter optimization that supports both **grid search** and more efficient, adaptive methods. With Optuna we can easily run evaluations over a fixed hyperparameter grid, or use its **TPE (Tree-structured Parzen Estimator)** sampler to explore continuous, large and / or conditional search spaces in a data-driven way. The TPE sampler fits separate probabilistic models to "good" and "bad" parameter sets and selects new trials that maximize the ratio of the two (i.e. promising configurations).

For more details on TPE and Optuna, see:

* [Optuna main site](https://optuna.org/)
* [TPESampler documentation](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.TPESampler.html) 
* [This article, for a deeper understanding of the TPESampler](https://arxiv.org/abs/2304.11127)

Overview of the following subsections:
* **"Start a new study"**: how to create and run a new optimization study from scratch.
* **"Review past studies"**: how to load a completed Optuna study in order to delete, copy, continue, or inspect its results in detail.

A detailed review and commentary of the tuning results is provided in the next section.

### Start a new study

As a first step to start a study, select the time series (`unique_id`) you want to optimize. Next, choose a **suggester function** (or define a new one in `heat_forecast/suggesters/lstm.py`) that specifies the hyperparameter search space. Finally, configure the Optuna study by specifying the sampler, pruner, study name, optimization objective, and other parameters that control the search process.

In [None]:
# --- Step 1: Select series ---
unique_id = 'F2'

# --- Step 2: Define search space ---
suggester_name = "final_v2_F2"

# === Do not edit below ===
# Set path based on unique_id
db_path = OPTUNA_DIR / f"optuna_{unique_id}.db"  # single DB file for each id
storage_url = f"sqlite:///{db_path.as_posix()}"

# Describe suggester
desc = describe_suggester(suggester_name)
logging.info(f"Suggester used:\n{desc}")

In [None]:
# --- Step 3: Configure study ---
optuna_cfg = OptunaStudyConfig(
    # General
    study_name='final_study_v2_NAR_F1',
    objective="avg_near_best", # "best" or "last" (based on val metric)
    n_trials=200,           # number of trials, use None for grid search
    timeout=None,            # timeout for the study (max time per trial in seconds)
    seed=SEED,               # seed for reproducibility of the optuna sampler
    storage=storage_url,     # storage URL for the study 
    pruner="percentile",            # type of pruner: "percentile", "median", "nop"
    sampler="tpe",          # type of sampler: "tpe", "grid"
)

Run the study:

In [None]:
# --- Step 4: Run the study ---
do_run = False

# === Do not edit below ===
if do_run:
    # Set path based on unique_id
    db_path = OPTUNA_DIR / f"optuna_{unique_id}.db"  # single DB file for each id
    storage_url = f"sqlite:///{db_path.as_posix()}"

    # Set base configuration of the pipeline
    base_cfg = LSTMRunConfig(
        model=ModelConfig(),
        data=DataConfig(),
        features=FeatureConfig(),
        train=TrainConfig(),
        norm=NormalizeConfig(),
    )

    start_train = None # -> first date available 
    start_val = pd.Timestamp("2023-11-01 00:00") 
    end_train = pd.Timestamp("2023-10-31 23:00")  
    end_val = pd.Timestamp("2024-04-01 23:00")

    optuna_cfg.storage = storage_url

    study = run_study(
        unique_id,
        heat_df, 
        aux_df, 
        base_cfg,
        start_train=start_train, end_train=end_train, 
        start_val=start_val, end_val=end_val,
        optuna_cfg=optuna_cfg,
        suggest_config_name=suggester_name,
    )
    print("Best value (val loss):", study.best_value)
    print("Best params:", study.best_trial.params)

### Review past studies

In this section, we load an existing study and choose whether to continue, delete, rename, copy, or inspect it.

#### Load and/or modify a study

See available studies for a fixed ID:

In [None]:
# Choose unique_id
unique_id = 'F5'

# === Do not edit below ===
# Set path based on unique_id
db_path = OPTUNA_DIR / f"optuna_{unique_id}.db"  # single DB file for each id
storage_url = f"sqlite:///{db_path.as_posix()}"

# Get all study summaries
study_summaries = optuna.study.get_all_study_summaries(storage=storage_url)

# Print study summaries
study_summaries = sorted(study_summaries, key=lambda s: s.study_name)
lines = [f"Study name: {s.study_name}, trials: {s.n_trials}" for s in study_summaries]
logging.info("Available studies:\n\n" + "\n".join(lines))

Choose a study to continue/delete/review by selecting its name below. Then view a description of the search space used for that study.

In [None]:
study_name = "preliminary_study_v4_F5"

# === Do not edit below ===
# View detailed description of the search space for the study (the suggester documentation)
study = optuna.load_study(study_name=study_name, storage=storage_url)
desc = describe_suggester(study.user_attrs.get("suggest_config_name", ""))
parent_study = study.user_attrs.get("_parent_study", "")
txt = f"parent study ({parent_study}):" if parent_study else 'this study:'
logging.info(f"Suggester used in {txt} \n{desc}\n")
if parent_study:
    filter = yaml.dump(study.user_attrs.get('_substudy_filter', ''), indent=4, sort_keys=False)
    logging.info(f"This substudy was obtained though the filter: \n{filter}")

Optionally continue the study:

In [None]:
do_continue = False

# Choose how many trials to add
n_new_trials = 30

# === Do not edit below ===
if do_continue:
    # Continue the loaded study with additional trials
    study = continue_study(
        study_name,
        storage_url,
        n_new_trials=n_new_trials,
        target_df=heat_df,
        aux_df=aux_df,
    )

Optionally create a sub-study selecting trials based on a predicate:

In [None]:
do_create = False

if do_create:
    study = clone_filtered_study(
        storage_url=storage_url,
        src_name=study_name,
        new_name="",
        save_to_storage=True,
        predicate=lambda t: (
            t.params.get("train.learning_rate") is not None
            and float(t.params["train.learning_rate"]) > 2.2e-4
        ),
        dry_run=True
    )

Optionally delete the study:

In [None]:
do_delete = False

if do_delete:
    optuna.delete_study(
        study_name="", # change to `study_name` if you are really sure you want to proceed 
        storage=storage_url,  
    )

Optionally rename the study:

In [None]:
do_rename = False

if do_rename:
    study = rename_study(
        old_name="",
        new_name="",
        storage_url=storage_url,
        keep_old=False,
        dry_run=True
    )

#### View study results

In [None]:
from heat_forecast.utils.optuna import (
    trials_df, trials_df_for_display, summarize_params_coverage, 
    plot_intermediate_values, plot_optimization_history
)

Optionally filter based on a predicate:

In [None]:
do_filter = False

if do_filter:
    study_filtered = clone_filtered_study(
        src_study=study,
        predicate=lambda t: (
            t is not None
            #and t.params.get("train.drop_epoch") == 7
            and float(t.params.get("train.learning_rate")) > 7e-4
            and float(t.params.get("train.learning_rate")) < 1.5e-3
            #and t.user_attrs.get("n_params") is not None
            #and 10000 < float(t.user_attrs["n_params"]) < 200000
            and t.params.get("model.dropout") < .08
            #and t.params.get("model.hidden_size") > 48
            #and not ((t.params.get("model.hidden_size") == 8)
            #or (t.params.get("model.hidden_size") == 40 and t.params.get("model.num_layers") == 1)
            #or (t.params.get("model.hidden_size") == 32 and t.params.get("model.num_layers") == 2))
            #and t.params.get("model.hidden_size") == 112
            #and t.params.get("model.num_layers") == 2
            #and t.params.get("train.learning_rate") < 1.5e-3
            #and t.params.get("model.dropout") < .05
            #and t.params.get("model.dropout") > 0.1
            #and t.params.get("model.hidden_size") <= 12
            #and t.params.get("model.num_layers") == 1

            ),
    )
else:
    study_filtered = study

View best trials in the study:

In [None]:
# Create DataFrame of all trials
df, val_name = trials_df(study_filtered)

# === Do not edit below ===
logging.info(f"Trials total={len(df)}, complete={sum(df['state']=='COMPLETE')}, pruned={sum(df['state']=='PRUNED')}, " \
             f"fail={sum(df['state']=='FAIL')}, running={sum(df['state']=='RUNNING')}, waiting={sum(df['state']=='WAITING')}")
with pd.option_context("display.max_columns", None, "display.max_rows", None):
    display(trials_df_for_display(df, val_name).head(40))

View values distribution:

In [None]:
# Extract the column
data = df[val_name].dropna()

# Compute stats
stats = {
    "Min": data.min(),
    "Q1 (25%)": data.quantile(0.25),
    "Median (Q2)": data.median(),
    "Mean": data.mean(),
    "Q3 (75%)": data.quantile(0.75),
    "Max": data.max(),
    "Std Dev": data.std(),
    "Count": data.count()
}

# Convert to DataFrame for display
stats_df = pd.DataFrame(stats, index=[val_name])
display(stats_df)

# Create the histogram
plt.figure(figsize=(10, 3))
plt.hist(data, bins=30)
plt.title(f"Distribution of {val_name}", fontsize=14)
plt.xlabel(val_name)
plt.ylabel("Frequency")

plt.show()


View optimization history:

In [None]:
plot_optimization_history(study_filtered)

View the intermediate validation losses for each trial. The function also supports filtering curves based on specific trial parameters or attributes.

In [None]:
plot_intermediate_values(
    study_filtered, 
    # --- Apply custom filtering here if needed ---
    #include_params={'features.lags_key': 'none'}, 
    predicate=lambda t: (
        t is not None
        and t.params.get("train.learning_rate") > 3e-4
        #and t.params.get("train.learning_rate") < 1e-3
        and t.params.get("model.dropout") < .15
        #and t.params.get("model.dropout") > 0.01
        #and (t.params.get("model.hidden_size") == 12)
        #and t.params.get("model.num_layers") == 2
        #and t.params.get("train.use_weight_decay") == False
        #and float(t.params.get("train.learning_rate")) > 7e-4
        #and float(t.params.get("train.learning_rate")) < 1.5e-3
        #and t.user_attrs.get("n_params") is not None
        #and 10000 < float(t.user_attrs["n_params"]) < 200000
        #and t.params.get("model.dropout") < .2
    ),
    dim_excluded=False,
    dim_factor=0.1
)

Analysis of coverage of the parameters space:

In [None]:
# === Do not edit below ===
num_sty, cat_sty = summarize_params_coverage(study, df, val_name)

with pd.option_context("display.float_format", lambda v: f"{v:,.4f}"):
    if num_sty:
        logging.info("Coverage summary of numeric parameters:")
        display(num_sty)
    else:
        logging.info("No numeric parameters found for the study.")
    if cat_sty:
        logging.info("Coverage summary of categorical parameters:")
        display(cat_sty)
    else:
        logging.info("No categorical parameters found for the study.")

Counts of best epochs:

In [None]:
from matplotlib.ticker import MaxNLocator

# === Do not edit below ===
best_epochs = df[df['state'] == 'COMPLETE']['user_attrs_best_epoch']

s = pd.Series(pd.to_numeric(best_epochs, errors="coerce")).dropna().astype(int)
if not s.any():
    logging.info("No best_epoch found.")
else:
    # count each integer and include missing integers with count=0
    lo, hi = int(s.min()), int(s.max())
    counts = s.value_counts().sort_index()
    counts = counts.reindex(range(lo, hi + 1), fill_value=0)

    # plot
    fig, ax = plt.subplots(figsize=(9, 3.5), constrained_layout=True)
    ax.bar(counts.index, counts.values, width=0.8)
    ax.set_xlabel("Best epoch")
    ax.set_ylabel("Count of trials")
    ax.set_title(f"Best epoch counts")
    ax.set_xticks(counts.index)
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))  # integer ticks only

    # log best
    logging.info(f"Best epoch by median: {s.median() :.0f}")

    plt.show()


#### View results marginalized on single hyperparameters

In [None]:
from heat_forecast.utils.optuna import (
    plot_marginals_1d, plot_param_importances, display_marginals_1d
)
import optuna.importance as imp

View fANOVA importances:

In [None]:
# === Do not edit below ===
# get importances
imps = imp.get_param_importances(study_filtered)
imps = pd.Series(imps, dtype=float).sort_values(ascending=False)

# plot
if imps.any():
    fig, ax = plot_param_importances(imps)

Compute and plot 1D marginal distributions. Many of the following functions also accept arguments such as `non_params_to_allow`, which lets you include selected trial user attributes in the marginal computations (treating them as parameters), and `objective`, which allows you to replace the objective value with any other numeric user attribute.

In [None]:
plot_marginals_1d(
    df, val_name,
    bins_numeric=7,
    non_params_to_allow=["user_attrs_n_params"],
    #objective="user_attrs_avg_near_best"
)

Plot only a subset of marginals (e.g. most important):

In [None]:
most_imp_params = imps[:1].index.tolist()

plot_marginals_1d(
    df, val_name,
    params=most_imp_params,
    bins_numeric=12,
    #non_params_to_allow=["user_attrs_n_params"]
)

Detailed summaries for each parameter:

In [None]:
top_k = 20          # Will show the fraction of trials for each parameter choice that belongs to the top_k trials
top_frac = 0.20     # Will show the fraction of trials for each parameter choice that belongs to the top_frac trials
params = None

tbls_sty = display_marginals_1d(
    df, val_name,
    #params=[],
    non_params_to_allow=["user_attrs_n_params"],
    #objective="user_attrs_avg_near_best",
    top_k=top_k,
    top_frac=top_frac,
    binning_numeric="quantile",
    custom_edges_dict={"user_attrs_n_params": [40000, 110000, 160000]},
    bins_numeric=4,
)

In [None]:
from typing import Callable

def bootstrap_region_ci(
    df,
    val_name,
    region_mask,
    stat: Callable[[np.ndarray], float] = np.mean,
    n_boot: int = 2000,
    objective: str | None = None,
    difference: bool = True,
    seed: int = 0,
    dropna: bool = True,
):
    """
    Bootstrap CI for a region's mean (difference=False) or for the difference of means
    between region and its complement (difference=True), using COMPLETE trials only.

    Returns:
        point_estimate: float  (mean or mean difference)
        ci: tuple (low, high)
        sizes: dict {'n_region': int, 'n_complement': int}
    """
    if objective is None:
        objective = val_name

    # Split region vs complement and filter COMPLETE trials
    dfc_reg = df[region_mask & (df["state"] == "COMPLETE") & df[objective].notna()].copy()
    dfc_compl = df[~region_mask & (df["state"] == "COMPLETE") & df[objective].notna()].copy()

    # Extract values
    x = dfc_reg[objective].to_numpy(dtype=float)
    y = dfc_compl[objective].to_numpy(dtype=float)

    if dropna:
        x = x[np.isfinite(x)]
        y = y[np.isfinite(y)]

    n_reg, n_compl = len(x), len(y)

    # Basic sanity checks
    if n_reg == 0:
        raise ValueError("No COMPLETE rows in the selected region.")
    if difference and n_compl == 0:
        raise ValueError("No COMPLETE rows in the complement region for difference CIs.")

    rng = np.random.default_rng(seed)

    # Compute bootstrap replicates
    boots = np.empty(n_boot, dtype=float)
    if difference:
        # Δ = mean(region) - mean(complement)
        for b in range(n_boot):
            bx = stat(rng.choice(x, size=n_reg, replace=True))
            by = stat(rng.choice(y, size=n_compl, replace=True))
            boots[b] = bx - by
        point = stat(x) - stat(y)
        label_suffix = " difference"
    else:
        for b in range(n_boot):
            bx = stat(rng.choice(x, size=n_reg, replace=True))
            boots[b] = bx
        point = stat(x)
        label_suffix = ""

    # Percentile CI
    low, high = np.percentile(boots, [2.5, 97.5])

    logging.info(
        f"Bootstrap samples ({val_name}{label_suffix}):\n"
        f"point estimate                ={point:.2f}{f" (SD={x.std(ddof=1):.2f})" if not difference else ''},\n"
        f"95% CI                        =[{low:.2f}, {high:.2f}],\n"
        f"point +- symmetric half-width ={point:.2f} +- {((high - low) / 2):.2f},\n"
        f"actual half-widths            ={(high - point):.2f} / {(point - low):.2f},\n"
        f"n_region                      ={n_reg},\n"
        f"n_complement                  ={n_compl},\n"
        f"boot_mean                     ={boots.mean():.2f}, \n"
        f"boot_std                      ={boots.std(ddof=1):.2f}\n"
    )

    return point, (low, high), {"n_region": n_reg, "n_complement": n_compl}


bm, (blo, bhi), (n_reg, n_tot) = bootstrap_region_ci(
    df, val_name,
    region_mask=(df['params_model.use_ar_prev']==True),
    stat=np.mean,
    n_boot=10000,
    objective=val_name,
    difference=True,
)


#### Study interactions between hyperparameters

In [None]:
from optuna.visualization import plot_parallel_coordinate
from heat_forecast.utils.optuna import marginal_2d

Show parallel coordinate plot (mostly userful when using continuous params):

In [None]:
top_frac = 0.2   # color only top_frac% of trials
params = None    # choose params to plot
# params = imps[:6].index.tolist()  # alternative: pick only the most important

# === Do not edit below ===
fig = plot_parallel_coordinate(study_filtered, params=params)

vals = df[val_name].dropna().to_list()
th = np.quantile(vals, top_frac)
fig = optuna.visualization.plot_parallel_coordinate(study_filtered, params=params)
fig.data[0].dimensions[0].constraintrange = [min(vals), th]
fig.update_coloraxes(cmin=min(vals), cmax=max(vals))  

html = fig.to_html(include_plotlyjs="inline", full_html=False)
display(HTML(html))

Below we can visualize pairwise relationships between parameters or user-defined attributes by creating 2D marginal plots.

In [None]:
df['state'].unique()

In [None]:
from heat_forecast.utils.optuna import plot_marginals_2d

fig, pivots = plot_marginals_2d(
    df, val_name,
    #objective="user_attrs_avg_near_best",
    #params=["model.hidden_size", "model.num_layers", "train.drop_epoch", "model.dropout"], #imps.index[:3].tolist(),
    #params=["model.dropout", "train.learning_rate"], #imps.index[:3].tolist(),
    as_first="model.input_len", #imps.index[0],
    statistic="mean",
    binning="quantile",
    show_text=True,
    bins_a=4,
    bins_b=4,
    non_params_to_allow=["user_attrs_n_params"],
    custom_edges_dict_a={"user_attrs_n_params": [110000, 160000]},
    custom_edges_dict_b={"user_attrs_n_params": [110000, 160000]},
)
fig.show()


Display the tables plotted above, or choose a different statistic.

In [None]:
statistic = "mean"
n_max = 10 # max number of tables to display

# === Do not edit below ===
for key, pivs in list(pivots.items())[:n_max]:
    logging.info(f"2D marginal for {key}, statistic = '{statistic}':")
    piv = pivs.get(statistic)
    display(piv if piv is not None else f"(Not found)")

## Tuning results

In [None]:
base_final_cfgs = LSTMRunConfig(
    model=ModelConfig(
        input_len=72, 
        head="linear",
    ),
    data=DataConfig(
        batch_size=64,
    ),
    features=FeatureConfig(
        include_exog_lags=True
    ),
    train=TrainConfig(
        grad_clip_max_norm=10.0,
        use_lr_drop=True,
        lr_drop_factor=0.3,
        tf_drop_epochs=4,
        lr_drop_epoch=4,
        tf_mode="linear",
    ),
    norm=NormalizeConfig(mode="global"),
)

def final_cfgs(unique_id: str, horizon_type: str) -> dict:
    # Define final configurations for different series
    if unique_id not in ('F1', 'F2', 'F3', 'F4', 'F5') or horizon_type not in ('day', 'week'):
        raise ValueError("Invalid unique_id or horizon_type.")
    cfg = copy.deepcopy(base_final_cfgs)
    if unique_id == 'F1':
        cfg.model.hidden_size = 64
        cfg.model.num_layers = 2
        cfg.model.dropout = 0.0
        cfg.train.learning_rate = 8e-4 if horizon_type == 'week' else 1e-3
        cfg.train.n_epochs = 7         if horizon_type == 'week' else 12
        cfg.model.use_ar = "24h"       if horizon_type == 'week' else "none"
        return cfg
    elif unique_id == 'F2':
        cfg.model.hidden_size = 112
        cfg.model.num_layers = 1
        cfg.train.learning_rate = 8e-4
        cfg.train.n_epochs = 7  
        cfg.model.dropout = 0.05    if horizon_type == 'week' else 0.0
        cfg.model.use_ar = "24h"    if horizon_type == 'week' else "none"
        return cfg
    elif unique_id == 'F3':
        cfg.model.hidden_size = 32
        cfg.model.num_layers = 1
        cfg.model.dropout = 0.25
        cfg.train.learning_rate = 8e-4 if horizon_type == 'week' else 1e-3
        cfg.train.n_epochs = 7         if horizon_type == 'week' else 12
        cfg.model.use_ar = "24h"       if horizon_type == 'week' else "none"
        return cfg
    elif unique_id == 'F4':
        cfg.model.hidden_size = 16
        cfg.model.num_layers = 1
        cfg.model.dropout = 0.1        if horizon_type == 'week' else 0.0
        cfg.train.use_lr_drop = False
        cfg.train.tf_drop_epochs = 8
        cfg.train.learning_rate = 4e-4 if horizon_type == 'week' else 1e-3
        cfg.train.n_epochs = 4         if horizon_type == 'week' else 10
        cfg.model.use_ar = "24h"       if horizon_type == 'week' else "none"
        return cfg
    elif unique_id == 'F5':
        cfg.model.hidden_size = 8
        cfg.model.num_layers = 1
        cfg.model.dropout = 0.0
        cfg.train.learning_rate = 1.7e-3 if horizon_type == 'week' else 7e-4
        cfg.train.n_epochs = 3           if horizon_type == 'week' else 16
        cfg.model.use_ar = "24h"         if horizon_type == 'week' else "none"
        return cfg

## Test

Code for final testing with the tuned models:

In [None]:
do_test = False
grid = list(product(['F5'], ['day']))

if do_test:
    for id, horizon_type in tqdm(grid, desc="Test", leave=True):

        metadata = {}

        # --- Create directory for test results ---
        timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
        run_id = f"{id}_{horizon_type}_test_lstm_{timestamp}"
        path = BASE_DIR / "results" / "test" / "lstm" / run_id
        metadata['run_id'] = run_id

        try:
            path.mkdir(parents=True, exist_ok=False)
            logging.info(f"Created directory for test results: {path.relative_to(BASE_DIR)}")


            # --- Set params for cv ---
            out = get_cv_params_for_test(horizon_type)
            metadata['for_cv'] = {
                'step_size': out['step_size'],
                'test_hours': out['test_hours'],
                'end_test_cv': str(out['end_test_actual']),
                'n_windows': out['n_windows'],
                'refit': out['refit'],
                'n_fits': out['n_fits'],
            }

            # ------------- Run cross-validation with the optimal parameters -------------
            # create pipeline and generate futures
            heat_id_df = heat_df[heat_df['unique_id'] == id]
            aux_id_df = aux_df[aux_df['unique_id'] == id]
            config = final_cfgs(id, horizon_type)
            pipe = LSTMPipeline(
                target_df=heat_id_df, 
                config=config, 
                aux_df=aux_id_df, 
            )
            metadata['model_config'] = config.to_dict()
            metadata['device'] = DEVICE.type

            # Run cv
            t0 = pd.Timestamp.now()
            cv_df = pipe.cross_validation(
                test_size=out['test_hours'],  # Test size in hours
                end_test=out['end_test_actual'],  # End of the test period
                step_size=out['step_size'],   # Step size in hours
                refit=out['refit'],  # Do not refit the model on each window
                verbose=True,
            )
            t1 = pd.Timestamp.now()

            avg_elapsed = (t1 - t0).total_seconds() / out['n_fits']
            metadata['avg_el_per_fit'] = avg_elapsed

            cv_df.to_parquet(path / "cv_df.parquet", compression="snappy")

            metadata_path = path / 'metadata.yaml'
            with open(metadata_path, 'w') as f:
                safe_dump_yaml(
                    metadata,
                    f,
                    indent=4, 
                )

            logging.info(f"✓ Artifacts saved successfully for id={id}, horizon={horizon_type}.")

        except KeyboardInterrupt:
            logging.warning("✗ Interrupted; cleaning up.")
            remove_tree(path, require_within=BASE_DIR)
            logging.info("✓ Removed %s", Path(path).relative_to(BASE_DIR) if BASE_DIR in Path(path).resolve().parents else path)
            raise
        except Exception:
            logging.exception("✗ Error during test for id=%s, horizon=%s; cleaning up.", id, horizon_type)
            remove_tree(path, require_within=BASE_DIR)
            logging.info("✓ Removed %s", Path(path).relative_to(BASE_DIR) if BASE_DIR in Path(path).resolve().parents else path)
            raise

    logging.info(f"✓ Test completed.")

In [None]:
# Pick a run_id to analyze
run_id = "F5_day_test_sarimax_20251007T100300"

# === Do not edit below ===
# --- Load data ---
path = BASE_DIR / "results" / "test" / "sarimax" / run_id
cv_df = pd.read_parquet(path / "cv_df.parquet")
with open(path / "metadata.yaml", "r", encoding="utf-8") as f:
    metadata = yaml.safe_load(f) 

# --- check cv_df ---
from heat_forecast.utils.cv_utils import sanity_cv_df
_ = sanity_cv_df(cv_df, metadata, positive_forecasts=True)

interactive_plot_cutoff_results(
    target_df=heat_df,
    cv_df=cv_df,
    add_context=False,
    figsize=(11, 3)
)

# Pick a run_id to analyze
run_id = "F5_day_test_sarimax_alternative_20251007T033353"

# === Do not edit below ===
# --- Load data ---
path = BASE_DIR / "results" / "test" / "sarimax" / run_id
cv_df = pd.read_parquet(path / "cv_df.parquet")
with open(path / "metadata.yaml", "r", encoding="utf-8") as f:
    metadata = yaml.safe_load(f) 

# --- check cv_df ---
from heat_forecast.utils.cv_utils import sanity_cv_df
_ = sanity_cv_df(cv_df, metadata, positive_forecasts=True)

# --- visual check ---
interactive_plot_cutoff_results(
    target_df=heat_df,
    cv_df=cv_df,
    add_context=False,
    figsize=(11, 3)
)