In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from darts import TimeSeries
from darts.models import TransformerModel
from darts.metrics import smape
from darts.dataprocessing.transformers import Scaler
import torch
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.callbacks import Callback, EarlyStopping
import optuna
import os
import json

# Visualization settings
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12, 6)
torch.set_float32_matmul_precision('medium')

  __import__("pkg_resources").declare_namespace(__name__)  # type: ignore


In [2]:
target_columns = [
    'Temperature','Precipitation_accumulated','Humidity',
    'Soil_Moisture', 'Soil_Temperature', 
]

DATA_FILE_PATH = "../data/ground_station_clean.csv"
df = pd.read_csv(DATA_FILE_PATH)

In [3]:
encoders = {
    "cyclic": {
        "past": ["month", "dayofyear", "day", "hour", "minute"],
        "future": ["month", "dayofyear", "day", "hour", "minute"]
    },
    "transformer": Scaler(),
    "datetime_attribute": {
        "past": ["year"],
        "future": ["year"]
    }
}

In [4]:
class PatchedPruningCallback(PyTorchLightningPruningCallback, Callback):
    pass

# Create directory to save iteration results
results_output_dir = "optuna_iteration_metrics"
os.makedirs(results_output_dir, exist_ok=True)

In [5]:
from sklearn.model_selection import TimeSeriesSplit

def objective(trial):
    input_chunk_length = trial.suggest_int("input_chunk_length", 24, 168, step=12)
    dropout = trial.suggest_float("dropout", 0.1, 0.5, step=0.05)
    batch_size = trial.suggest_int("batch_size", 64, 256, step=8)
    lr = trial.suggest_float("lr", 1e-6, 5e-3, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)


    nhead = trial.suggest_int('nhead', 2, 8)
    d_model = trial.suggest_int('d_model', 32, 256, step=8)
    if d_model % nhead != 0:
        raise optuna.exceptions.TrialPruned(f"d_model ({d_model}) must be divisible by nhead ({nhead})")
    num_encoder_layers = trial.suggest_int('num_encoder_layers', 1, 6)
    num_decoder_layers = trial.suggest_int('num_decoder_layers', 1, 6)
    activation = trial.suggest_categorical('activation', ["GLU", "Bilinear", "ReGLU", "GEGLU", "SwiGLU", "ReLU", "GELU"])
    
    early_stopper = EarlyStopping("val_loss", min_delta=0.0005, patience=15, verbose=True)
    prunner = PatchedPruningCallback(trial, monitor="val_loss")
    pl_trainer_kwargs = {
        "accelerator": "auto",
        "callbacks": [early_stopper, prunner],
    }

    n_splits = 3
    tscv = TimeSeriesSplit(n_splits=n_splits)
    overall_smape_list = []
    fold = 0

    for train_index, val_index in tscv.split(df):
        print(f"\nFold {fold+1}/{n_splits}")
        train_df_fold = df.iloc[train_index]
        val_df_fold = df.iloc[val_index]
        train_fold = TimeSeries.from_dataframe(train_df_fold, time_col="Timestamp", value_cols=target_columns, freq='1h')
        val_fold = TimeSeries.from_dataframe(val_df_fold, time_col="Timestamp", value_cols=target_columns, freq='1h')

        scaler = Scaler()
        scaler = scaler.fit(train_fold)
        train_scaled = scaler.transform(train_fold)
        val_scaled = scaler.transform(val_fold)

        opt_kwargs = {"lr": lr, "weight_decay": weight_decay}

        model = TransformerModel(
            model_name=f"model_{fold+1}",
            input_chunk_length=input_chunk_length,
            output_chunk_length=24,
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            num_decoder_layers=num_decoder_layers,
            activation=activation,
            n_epochs=30,
            batch_size=batch_size,
            dropout=dropout,
            add_encoders=encoders,
            pl_trainer_kwargs=pl_trainer_kwargs,
            loss_fn=torch.nn.L1Loss(),
            optimizer_cls=torch.optim.Adam,
            lr_scheduler_cls=torch.optim.lr_scheduler.ReduceLROnPlateau,
            lr_scheduler_kwargs={"mode": "min", "factor": 0.3, "patience": 7, "min_lr": 1e-7},
            save_checkpoints=True,
            force_reset=True,
            random_state=42,
            optimizer_kwargs=opt_kwargs,
        )
        model.fit(
            series=train_scaled,
            val_series=val_scaled,
            verbose=False,
            dataloader_kwargs={"num_workers": 11},
        )

        try:
            loaded_model = model.load_from_checkpoint(f"model_{fold+1}", best=True)
            print(f"Model loaded from checkpoint for trial {trial.number}, fold {fold}")
        except FileNotFoundError:
            print(f"Checkpoint not found. Using the in-memory trained model.")
            loaded_model = model

        forecasts = loaded_model.historical_forecasts(
            val_scaled,
            forecast_horizon=24,
            stride=1,
            retrain=False,
            verbose=False
        )

        overall_smape_val = smape(val_scaled, forecasts)
        print(f"SMAPE fold {fold}: {overall_smape_val}")
        overall_smape_list.append(overall_smape_val)
        fold += 1

    mean_smape = np.mean(overall_smape_list)
    print(f"Média dos SMAPE nos folds: {mean_smape}")

    trial_dict = {
        "trial_number": trial.number,
        "fold_smape": overall_smape_list,
        "mean_smape": mean_smape,
        "hyperparameters": trial.params
    }
    json_path = os.path.join(results_output_dir, f"trial_{trial.number}.json")
    with open(json_path, 'w') as f:
        json.dump(trial_dict, f, indent=4)
    print(f"Results of trial {trial.number} saved in {json_path}")
    return mean_smape if not np.isnan(mean_smape) else float("inf")

In [6]:
def print_callback(study, trial):
    print(f"Current value: {trial.value}, Current params: {trial.params}")
    print(f"Best value: {study.best_value}, Best params: {study.best_trial.params}")

In [8]:
study = optuna.create_study(direction="minimize")
num_hyperparams = 11
n_trials = 7 * num_hyperparams
study.optimize(objective, n_trials=n_trials, callbacks=[print_callback])

[I 2025-06-15 12:23:01,788] A new study created in memory with name: no-name-6170d51e-e95e-4558-a809-ae236d85336b
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Fold 1/3


Metric val_loss improved. New best score: 12.912
Metric val_loss improved by 0.090 >= min_delta = 0.0005. New best score: 12.822
Metric val_loss improved by 0.056 >= min_delta = 0.0005. New best score: 12.766
Metric val_loss improved by 0.026 >= min_delta = 0.0005. New best score: 12.740
Metric val_loss improved by 0.022 >= min_delta = 0.0005. New best score: 12.718
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.705
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.695
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.685
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.677
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 12.667
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.657
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.654
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 0, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 123.91005367367643

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 12.344 >= min_delta = 0.0005. New best score: 0.307
Metric val_loss improved by 0.138 >= min_delta = 0.0005. New best score: 0.168
Metric val_loss improved by 0.053 >= min_delta = 0.0005. New best score: 0.116
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.114
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.111
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.109
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.106
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.104
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.101
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 0.094
Metric val_loss improved by 0.005 >= min_delta = 0.

Model loaded from checkpoint for trial 0, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 1: 48.977387416544694

Fold 3/3


Monitored metric val_loss did not improve in the last 16 records. Best score: 0.071. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 0, fold 2


[I 2025-06-15 12:25:37,609] Trial 0 finished with value: 76.76633897565473 and parameters: {'input_chunk_length': 108, 'dropout': 0.4, 'batch_size': 184, 'lr': 0.00040659498654104626, 'weight_decay': 9.932333991688388e-05, 'nhead': 4, 'd_model': 56, 'num_encoder_layers': 2, 'num_decoder_layers': 2, 'activation': 'ReGLU'}. Best is trial 0 with value: 76.76633897565473.
[I 2025-06-15 12:25:37,610] Trial 1 pruned. d_model (152) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 57.4115758367431
Média dos SMAPE nos folds: 76.76633897565473
Results of trial 0 saved in optuna_iteration_metrics/trial_0.json
Current value: 76.76633897565473, Current params: {'input_chunk_length': 108, 'dropout': 0.4, 'batch_size': 184, 'lr': 0.00040659498654104626, 'weight_decay': 9.932333991688388e-05, 'nhead': 4, 'd_model': 56, 'num_encoder_layers': 2, 'num_decoder_layers': 2, 'activation': 'ReGLU'}
Best value: 76.76633897565473, Best params: {'input_chunk_length': 108, 'dropout': 0.4, 'batch_size': 184, 'lr': 0.00040659498654104626, 'weight_decay': 9.932333991688388e-05, 'nhead': 4, 'd_model': 56, 'num_encoder_layers': 2, 'num_decoder_layers': 2, 'activation': 'ReGLU'}
Current value: None, Current params: {'input_chunk_length': 72, 'dropout': 0.2, 'batch_size': 96, 'lr': 4.453353599762701e-06, 'weight_decay': 4.072614982260518e-05, 'nhead': 3, 'd_model': 152}
Best value: 76.76633897565473, Best params: {'input_chunk_length': 108, 'dropout': 0.4, 'batch_size': 184,

Metric val_loss improved. New best score: 12.674
Metric val_loss improved by 0.042 >= min_delta = 0.0005. New best score: 12.632
Metric val_loss improved by 0.036 >= min_delta = 0.0005. New best score: 12.596
Metric val_loss improved by 0.027 >= min_delta = 0.0005. New best score: 12.569
Metric val_loss improved by 0.021 >= min_delta = 0.0005. New best score: 12.548
Metric val_loss improved by 0.021 >= min_delta = 0.0005. New best score: 12.528
Metric val_loss improved by 0.017 >= min_delta = 0.0005. New best score: 12.511
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.497
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.483
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.469
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.460
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.450
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 2, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 0: 103.70892540748503

Fold 2/3


Metric val_loss improved by 12.115 >= min_delta = 0.0005. New best score: 0.292
Metric val_loss improved by 0.118 >= min_delta = 0.0005. New best score: 0.174
Metric val_loss improved by 0.038 >= min_delta = 0.0005. New best score: 0.136
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.129
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 0.121
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.116
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.111
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.109
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.107
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.105
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.103
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.101
Metric val_loss improved by 0.002 >= min_delta = 0.

Model loaded from checkpoint for trial 2, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 1: 33.8447352307288

Fold 3/3


Monitored metric val_loss did not improve in the last 15 records. Best score: 0.062. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 2, fold 2


[I 2025-06-15 12:32:02,160] Trial 2 finished with value: 62.29155632988628 and parameters: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 49.32100835144503
Média dos SMAPE nos folds: 62.29155632988628
Results of trial 2 saved in optuna_iteration_metrics/trial_2.json
Current value: 62.29155632988628, Current params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.379
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.364
Metric val_loss improved by 0.022 >= min_delta = 0.0005. New best score: 12.341
Metric val_loss improved by 0.036 >= min_delta = 0.0005. New best score: 12.305
Metric val_loss improved by 0.021 >= min_delta = 0.0005. New best score: 12.284
Monitored metric val_loss did not improve in the last 15 records. Best score: 12.284. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 3, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 119.8854143719623

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 12.164 >= min_delta = 0.0005. New best score: 0.120
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.117
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.115
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 0.101
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 0.082
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.078
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.072
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 0.061
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.060
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.056
Metric val_loss improved by 0.005 >= min_delta = 0.

Model loaded from checkpoint for trial 3, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 40.35744178309647

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.044. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 3, fold 2


[I 2025-06-15 12:42:56,469] Trial 3 finished with value: 73.64205320188096 and parameters: {'input_chunk_length': 72, 'dropout': 0.45000000000000007, 'batch_size': 112, 'lr': 0.00020177800900359323, 'weight_decay': 1.6320786847091616e-06, 'nhead': 2, 'd_model': 184, 'num_encoder_layers': 4, 'num_decoder_layers': 6, 'activation': 'ReGLU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 60.683303450584106
Média dos SMAPE nos folds: 73.64205320188096
Results of trial 3 saved in optuna_iteration_metrics/trial_3.json
Current value: 73.64205320188096, Current params: {'input_chunk_length': 72, 'dropout': 0.45000000000000007, 'batch_size': 112, 'lr': 0.00020177800900359323, 'weight_decay': 1.6320786847091616e-06, 'nhead': 2, 'd_model': 184, 'num_encoder_layers': 4, 'num_decoder_layers': 6, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 13.051
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 13.032
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 13.016
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 13.004
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.993
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.985
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 12.979
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.975
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.972
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.970
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.968
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.967
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 4, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 0: 142.19820701237103

Fold 2/3


Metric val_loss improved by 12.535 >= min_delta = 0.0005. New best score: 0.420
Metric val_loss improved by 0.118 >= min_delta = 0.0005. New best score: 0.303
Metric val_loss improved by 0.077 >= min_delta = 0.0005. New best score: 0.225
Metric val_loss improved by 0.058 >= min_delta = 0.0005. New best score: 0.168
Metric val_loss improved by 0.030 >= min_delta = 0.0005. New best score: 0.138
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 0.123
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.119
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.115
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.112
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.111
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.110
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.105
Metric val_loss improved by 0.004 >= min_delta = 0.

Model loaded from checkpoint for trial 4, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 1: 41.688360805198464

Fold 3/3


Monitored metric val_loss did not improve in the last 15 records. Best score: 0.075. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 4, fold 2


[I 2025-06-15 12:52:03,598] Trial 4 finished with value: 81.20098757015977 and parameters: {'input_chunk_length': 132, 'dropout': 0.15000000000000002, 'batch_size': 120, 'lr': 4.4336636829087386e-05, 'weight_decay': 2.3845010268325604e-05, 'nhead': 3, 'd_model': 120, 'num_encoder_layers': 3, 'num_decoder_layers': 1, 'activation': 'SwiGLU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 59.71639489290978
Média dos SMAPE nos folds: 81.20098757015977
Results of trial 4 saved in optuna_iteration_metrics/trial_4.json
Current value: 81.20098757015977, Current params: {'input_chunk_length': 132, 'dropout': 0.15000000000000002, 'batch_size': 120, 'lr': 4.4336636829087386e-05, 'weight_decay': 2.3845010268325604e-05, 'nhead': 3, 'd_model': 120, 'num_encoder_layers': 3, 'num_decoder_layers': 1, 'activation': 'SwiGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.683
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.678
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.673
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.668
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.663
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.658
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.653
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.649
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.644
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.640
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.635
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.631
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 5, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 0: 133.05952979699563

Fold 2/3


Metric val_loss improved by 11.923 >= min_delta = 0.0005. New best score: 0.639
Metric val_loss improved by 0.022 >= min_delta = 0.0005. New best score: 0.618
Metric val_loss improved by 0.022 >= min_delta = 0.0005. New best score: 0.596
Metric val_loss improved by 0.021 >= min_delta = 0.0005. New best score: 0.575
Metric val_loss improved by 0.020 >= min_delta = 0.0005. New best score: 0.555
Metric val_loss improved by 0.020 >= min_delta = 0.0005. New best score: 0.536
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 0.517
Metric val_loss improved by 0.018 >= min_delta = 0.0005. New best score: 0.499
Metric val_loss improved by 0.017 >= min_delta = 0.0005. New best score: 0.482
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 0.467
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 0.452
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 0.438
Metric val_loss improved by 0.013 >= min_delta = 0.

Model loaded from checkpoint for trial 5, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 75.85823933002821

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.314. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 5, fold 2


[I 2025-06-15 12:58:17,815] Trial 5 finished with value: 98.01531189501168 and parameters: {'input_chunk_length': 60, 'dropout': 0.5, 'batch_size': 232, 'lr': 2.1688871725782547e-06, 'weight_decay': 0.00035787103979123016, 'nhead': 4, 'd_model': 152, 'num_encoder_layers': 2, 'num_decoder_layers': 6, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 85.12816655801115
Média dos SMAPE nos folds: 98.01531189501168
Results of trial 5 saved in optuna_iteration_metrics/trial_5.json
Current value: 98.01531189501168, Current params: {'input_chunk_length': 60, 'dropout': 0.5, 'batch_size': 232, 'lr': 2.1688871725782547e-06, 'weight_decay': 0.00035787103979123016, 'nhead': 4, 'd_model': 152, 'num_encoder_layers': 2, 'num_decoder_layers': 6, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.291
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.288
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.287
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.286
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.283
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.279
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.277
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.274
Monitored metric val_loss did not improve in the last 15 records. Best score: 12.274. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 6, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 88.37399636359577

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.925 >= min_delta = 0.0005. New best score: 0.349
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.345
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.345. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 6, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 93.60913451703065

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 0.148 >= min_delta = 0.0005. New best score: 0.197
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 0.189
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.182
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.176
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.172
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.172. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 6, fold 2


[I 2025-06-15 13:09:28,861] Trial 6 finished with value: 79.71075824932291 and parameters: {'input_chunk_length': 60, 'dropout': 0.15000000000000002, 'batch_size': 216, 'lr': 0.00297963229520722, 'weight_decay': 1.3243932635819752e-06, 'nhead': 8, 'd_model': 176, 'num_encoder_layers': 5, 'num_decoder_layers': 6, 'activation': 'GLU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 57.14914386734232
Média dos SMAPE nos folds: 79.71075824932291
Results of trial 6 saved in optuna_iteration_metrics/trial_6.json
Current value: 79.71075824932291, Current params: {'input_chunk_length': 60, 'dropout': 0.15000000000000002, 'batch_size': 216, 'lr': 0.00297963229520722, 'weight_decay': 1.3243932635819752e-06, 'nhead': 8, 'd_model': 176, 'num_encoder_layers': 5, 'num_decoder_layers': 6, 'activation': 'GLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 13:09:41,705] Trial 7 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 13.03263963756587, Current params: {'input_chunk_length': 168, 'dropout': 0.30000000000000004, 'batch_size': 232, 'lr': 0.0012430664193757459, 'weight_decay': 0.00038728000225098834, 'nhead': 2, 'd_model': 160, 'num_encoder_layers': 6, 'num_decoder_layers': 1, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.507
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.502
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.501
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.500
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.499
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.496
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 12.487
Metric val_loss improved by 0.020 >= min_delta = 0.0005. New best score: 12.466
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.465
[I 2025-06-15 13:16:40,482] Trial 8 pruned. Trial was pruned at epoch 28.
[I 2025-06-15 13:16:40,484] Trial 9 pruned. d_model (208) must be divisible by nhead (6)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU avai

Current value: 12.454700104085603, Current params: {'input_chunk_length': 96, 'dropout': 0.25, 'batch_size': 184, 'lr': 0.001354309787235038, 'weight_decay': 3.1511356735102033e-05, 'nhead': 4, 'd_model': 240, 'num_encoder_layers': 6, 'num_decoder_layers': 6, 'activation': 'ReLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 84, 'dropout': 0.30000000000000004, 'batch_size': 128, 'lr': 0.0010066995165653055, 'weight_decay': 2.72808389801792e-06, 'nhead': 6, 'd_model': 208}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_

Metric val_loss improved. New best score: 12.387
Metric val_loss improved by 0.046 >= min_delta = 0.0005. New best score: 12.341
Metric val_loss improved by 0.029 >= min_delta = 0.0005. New best score: 12.312
Metric val_loss improved by 0.023 >= min_delta = 0.0005. New best score: 12.289
Metric val_loss improved by 0.018 >= min_delta = 0.0005. New best score: 12.271
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.256
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.241
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.228
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.218
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 12.206
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.196
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 12.187
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 10, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 149.11092406450936

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.741 >= min_delta = 0.0005. New best score: 0.338
Metric val_loss improved by 0.130 >= min_delta = 0.0005. New best score: 0.208
Metric val_loss improved by 0.052 >= min_delta = 0.0005. New best score: 0.156
Metric val_loss improved by 0.017 >= min_delta = 0.0005. New best score: 0.139
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.137
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.134
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.131
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.129
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.127
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.124
Metric val_loss improved by 0.005 >= min_delta = 0.

Model loaded from checkpoint for trial 10, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 60.036130991149285

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.108. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 10, fold 2


[I 2025-06-15 13:23:36,202] Trial 10 finished with value: 85.53755946848958 and parameters: {'input_chunk_length': 24, 'dropout': 0.4, 'batch_size': 64, 'lr': 2.3593446003983578e-05, 'weight_decay': 0.005407804448909039, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 4, 'activation': 'SwiGLU'}. Best is trial 2 with value: 62.29155632988628.
[I 2025-06-15 13:23:36,218] Trial 11 pruned. d_model (256) must be divisible by nhead (6)
[I 2025-06-15 13:23:36,235] Trial 12 pruned. d_model (200) must be divisible by nhead (6)
[I 2025-06-15 13:23:36,252] Trial 13 pruned. d_model (120) must be divisible by nhead (7)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True


SMAPE fold 2: 47.46562334981008
Média dos SMAPE nos folds: 85.53755946848958
Results of trial 10 saved in optuna_iteration_metrics/trial_10.json
Current value: 85.53755946848958, Current params: {'input_chunk_length': 24, 'dropout': 0.4, 'batch_size': 64, 'lr': 2.3593446003983578e-05, 'weight_decay': 0.005407804448909039, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 4, 'activation': 'SwiGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 24, 'dropout': 0.5, 'batch_size': 136, 'lr': 0.00017977041629619703, 'weight_decay': 4.6357541439442706e-06, 'nhead': 6, 'd_model': 256}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 

TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2025-06-15 13:23:50,993] Trial 14 pruned. Trial was pruned at epoch 0.
[I 2025-06-15 13:23:51,013] Trial 15 pruned. d_model (32) must be divisible by nhead (5)
[I 2025-06-15 13:23:51,031] Trial 16 pruned. d_model (192) must be divisible by nhead (7)
[I 2025-06-15 13:23:51,049] Trial 17 pruned. d_model (112) must be divisible by nhead (5)
[I 2025-06-15 13:23:51,067] Trial 18 pruned. d_model (232) must be divisible by nhead (7)
[I 2025-06-15 13:23:51,085] Trial 19 pruned. d_model (88) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


Current value: 13.084213164705291, Current params: {'input_chunk_length': 144, 'dropout': 0.35, 'batch_size': 160, 'lr': 1.6872363147836916e-05, 'weight_decay': 7.658005504444201e-06, 'nhead': 2, 'd_model': 208, 'num_encoder_layers': 4, 'num_decoder_layers': 4, 'activation': 'GEGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 84, 'dropout': 0.45000000000000007, 'batch_size': 96, 'lr': 6.785037263077283e-05, 'weight_decay': 0.009723440642764402, 'nhead': 5, 'd_model': 32}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved. New best score: 12.403
Metric val_loss improved by 0.047 >= min_delta = 0.0005. New best score: 12.356
Metric val_loss improved by 0.031 >= min_delta = 0.0005. New best score: 12.325
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.323
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.316
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 12.297
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.285
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.277
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 12.266
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.263
Monitored metric val_loss did not improve in the last 15 records. Best sc

Model loaded from checkpoint for trial 20, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 102.9863769982851

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 12.132 >= min_delta = 0.0005. New best score: 0.132
Metric val_loss improved by 0.017 >= min_delta = 0.0005. New best score: 0.115
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 0.103
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.097
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 0.088
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.082
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.073
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.072
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.063
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.059
Metric val_loss improved by 0.004 >= min_delta = 0.

Model loaded from checkpoint for trial 20, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 46.03461286692292

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.052. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 20, fold 2


[I 2025-06-15 13:33:51,539] Trial 20 finished with value: 68.49301422569486 and parameters: {'input_chunk_length': 36, 'dropout': 0.1, 'batch_size': 184, 'lr': 0.0001561527698134007, 'weight_decay': 3.462507726161673e-06, 'nhead': 8, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 3, 'activation': 'Bilinear'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 56.458052811876584
Média dos SMAPE nos folds: 68.49301422569486
Results of trial 20 saved in optuna_iteration_metrics/trial_20.json
Current value: 68.49301422569486, Current params: {'input_chunk_length': 36, 'dropout': 0.1, 'batch_size': 184, 'lr': 0.0001561527698134007, 'weight_decay': 3.462507726161673e-06, 'nhead': 8, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 3, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.411
Metric val_loss improved by 0.032 >= min_delta = 0.0005. New best score: 12.379
Metric val_loss improved by 0.024 >= min_delta = 0.0005. New best score: 12.354
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 12.342
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.337
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.322
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.310
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.303
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 12.292
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.289
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.282
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 12.276
[I 2025-06-15 13:36:51,497] Trial 21 pruned. Trial was pruned at epoch 

Current value: 12.295938162727175, Current params: {'input_chunk_length': 36, 'dropout': 0.1, 'batch_size': 192, 'lr': 0.00012381988988365887, 'weight_decay': 2.8493152483364028e-06, 'nhead': 8, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 3, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 72, 'dropout': 0.25, 'batch_size': 200, 'lr': 3.1187725334010476e-05, 'weight_decay': 1.1695283476185155e-06, 'nhead': 7, 'd_model': 256}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder

Metric val_loss improved. New best score: 12.452
Metric val_loss improved by 0.024 >= min_delta = 0.0005. New best score: 12.428
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.426
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 12.415
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.403
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 12.388
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.383
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.367
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.353
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 12.340
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.330
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.318
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score

Current value: 12.24850724668669, Current params: {'input_chunk_length': 36, 'dropout': 0.1, 'batch_size': 152, 'lr': 0.00018296304707744667, 'weight_decay': 5.101969903695592e-06, 'nhead': 8, 'd_model': 176, 'num_encoder_layers': 3, 'num_decoder_layers': 3, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 60, 'dropout': 0.4, 'batch_size': 168, 'lr': 7.48122402965252e-05, 'weight_decay': 1.1915389281847134e-05, 'nhead': 6, 'd_model': 224}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_laye

Metric val_loss improved. New best score: 12.419
Metric val_loss improved by 0.037 >= min_delta = 0.0005. New best score: 12.382
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.377
Metric val_loss improved by 0.030 >= min_delta = 0.0005. New best score: 12.347
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 12.328
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.318
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 12.306
[I 2025-06-15 13:43:37,257] Trial 25 pruned. Trial was pruned at epoch 28.
[I 2025-06-15 13:43:37,279] Trial 26 pruned. d_model (208) must be divisible by nhead (7)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True


Current value: 12.345558260693982, Current params: {'input_chunk_length': 84, 'dropout': 0.25, 'batch_size': 112, 'lr': 0.0005598428125866946, 'weight_decay': 2.1295749902850952e-06, 'nhead': 8, 'd_model': 184, 'num_encoder_layers': 4, 'num_decoder_layers': 4, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 108, 'dropout': 0.45000000000000007, 'batch_size': 104, 'lr': 1.5161069007346749e-05, 'weight_decay': 3.947581564857723e-06, 'nhead': 7, 'd_model': 208}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 

TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved. New best score: 12.187
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 12.175
Metric val_loss improved by 0.022 >= min_delta = 0.0005. New best score: 12.153
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.146
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.139
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 12.130
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.129
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.124
`Trainer.fit` stopped: `max_epochs=30` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 27, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 87.8826591211856

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.763 >= min_delta = 0.0005. New best score: 0.362
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.362. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 27, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 85.56253145928301

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 0.171 >= min_delta = 0.0005. New best score: 0.191
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.187
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.185
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.184
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.178
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.175
`Trainer.fit` stopped: `max_epochs=30` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 27, fold 2


[I 2025-06-15 13:58:59,383] Trial 27 finished with value: 77.00277650036425 and parameters: {'input_chunk_length': 36, 'dropout': 0.35, 'batch_size': 208, 'lr': 0.004869125000082519, 'weight_decay': 1.775285567732089e-05, 'nhead': 2, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}. Best is trial 2 with value: 62.29155632988628.
[I 2025-06-15 13:58:59,403] Trial 28 pruned. d_model (256) must be divisible by nhead (5)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 57.563138920624134
Média dos SMAPE nos folds: 77.00277650036425
Results of trial 27 saved in optuna_iteration_metrics/trial_27.json
Current value: 77.00277650036425, Current params: {'input_chunk_length': 36, 'dropout': 0.35, 'batch_size': 208, 'lr': 0.004869125000082519, 'weight_decay': 1.775285567732089e-05, 'nhead': 2, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 60, 'dropout': 0.2, 'batch_size': 248, 'lr': 7.651096588343934e-05, 'weight_decay': 1.73282696494893e-06, 'nhead': 5, 'd_model': 256}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size':

[I 2025-06-15 13:59:06,210] Trial 29 pruned. Trial was pruned at epoch 0.


Current value: 12.715969467744694, Current params: {'input_chunk_length': 96, 'dropout': 0.4, 'batch_size': 176, 'lr': 0.0002949155383394498, 'weight_decay': 6.542296699676285e-05, 'nhead': 4, 'd_model': 136, 'num_encoder_layers': 1, 'num_decoder_layers': 2, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2025-06-15 14:00:14,993] Trial 30 pruned. Trial was pruned at epoch 0.
[I 2025-06-15 14:00:15,017] Trial 31 pruned. d_model (88) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.94143281493364, Current params: {'input_chunk_length': 108, 'dropout': 0.45000000000000007, 'batch_size': 80, 'lr': 1.7735691062472563e-06, 'weight_decay': 0.00013193303790571878, 'nhead': 8, 'd_model': 176, 'num_encoder_layers': 5, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 120, 'dropout': 0.4, 'batch_size': 176, 'lr': 0.0005695983993364699, 'weight_decay': 0.0002689293820990036, 'nhead': 3, 'd_model': 88}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'n

[I 2025-06-15 14:00:21,369] Trial 32 pruned. Trial was pruned at epoch 0.


Current value: 12.98884177797706, Current params: {'input_chunk_length': 108, 'dropout': 0.15000000000000002, 'batch_size': 128, 'lr': 0.00023868005638104774, 'weight_decay': 0.0010212960813571275, 'nhead': 4, 'd_model': 32, 'num_encoder_layers': 2, 'num_decoder_layers': 2, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2025-06-15 14:01:23,649] Trial 33 pruned. Trial was pruned at epoch 0.
[I 2025-06-15 14:01:23,672] Trial 34 pruned. d_model (56) must be divisible by nhead (3)
[I 2025-06-15 14:01:23,693] Trial 35 pruned. d_model (160) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 13.027528657269745, Current params: {'input_chunk_length': 132, 'dropout': 0.2, 'batch_size': 144, 'lr': 4.643347640771909e-06, 'weight_decay': 4.768072651713846e-05, 'nhead': 2, 'd_model': 72, 'num_encoder_layers': 2, 'num_decoder_layers': 2, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 72, 'dropout': 0.35, 'batch_size': 224, 'lr': 0.00013086636678477027, 'weight_decay': 6.494953136534378e-06, 'nhead': 3, 'd_model': 56}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers

[I 2025-06-15 14:01:31,466] Trial 36 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 13.00015407171493, Current params: {'input_chunk_length': 168, 'dropout': 0.30000000000000004, 'batch_size': 80, 'lr': 0.0021343565589179866, 'weight_decay': 2.46434227395867e-05, 'nhead': 4, 'd_model': 144, 'num_encoder_layers': 1, 'num_decoder_layers': 2, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.309
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 12.290
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.288
Metric val_loss improved by 0.032 >= min_delta = 0.0005. New best score: 12.256
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.246
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.244
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.241
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.235
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.221
Metric val_loss improved by 0.026 >= min_delta = 0.0005. New best score: 12.196
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.194
Metric val_loss improved by 0.036 >= min_delta = 0.0005. New best score: 12.158
[I 2025-06-15 14:05:05,961] Trial 37 pruned. Trial was pruned at epoch 

Current value: 12.157482692369115, Current params: {'input_chunk_length': 60, 'dropout': 0.4, 'batch_size': 208, 'lr': 0.0008377269252298344, 'weight_decay': 3.089120957696183e-06, 'nhead': 6, 'd_model': 240, 'num_encoder_layers': 3, 'num_decoder_layers': 3, 'activation': 'ReLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 14:05:17,161] Trial 38 pruned. Trial was pruned at epoch 0.
[I 2025-06-15 14:05:17,185] Trial 39 pruned. d_model (184) must be divisible by nhead (5)
[I 2025-06-15 14:05:17,206] Trial 40 pruned. d_model (240) must be divisible by nhead (7)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


Current value: 13.088383464979644, Current params: {'input_chunk_length': 156, 'dropout': 0.45000000000000007, 'batch_size': 184, 'lr': 6.77139931000401e-05, 'weight_decay': 0.000890043798233251, 'nhead': 4, 'd_model': 160, 'num_encoder_layers': 2, 'num_decoder_layers': 5, 'activation': 'GLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 120, 'dropout': 0.30000000000000004, 'batch_size': 112, 'lr': 0.00031627109866000656, 'weight_decay': 0.0026221640811264846, 'nhead': 5, 'd_model': 184}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_l

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved. New best score: 12.169
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 12.160
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 12.142
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.137
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.136
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.132
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.129
`Trainer.fit` stopped: `max_epochs=30` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 41, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 88.15662900574746

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.755 >= min_delta = 0.0005. New best score: 0.373
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.364
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 0.350
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.350. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 41, fold 1
SMAPE fold 1: 106.14384054323605

Fold 3/3


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 0.156 >= min_delta = 0.0005. New best score: 0.194
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.193
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.189
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.187
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.184
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.177
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.175
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.170
`Trainer.fit` stopped: `max_epochs=30` reached.
GPU available: True (c

Model loaded from checkpoint for trial 41, fold 2


[I 2025-06-15 14:24:52,075] Trial 41 finished with value: 83.3822150043009 and parameters: {'input_chunk_length': 36, 'dropout': 0.35, 'batch_size': 216, 'lr': 0.003097070458928314, 'weight_decay': 9.706823121673909e-06, 'nhead': 2, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 2: 55.84617546391917
Média dos SMAPE nos folds: 83.3822150043009
Results of trial 41 saved in optuna_iteration_metrics/trial_41.json
Current value: 83.3822150043009, Current params: {'input_chunk_length': 36, 'dropout': 0.35, 'batch_size': 216, 'lr': 0.003097070458928314, 'weight_decay': 9.706823121673909e-06, 'nhead': 2, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved. New best score: 12.233
Metric val_loss improved by 0.020 >= min_delta = 0.0005. New best score: 12.213
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.212
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 12.196
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 12.190
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.187
[I 2025-06-15 14:28:55,858] Trial 42 pruned. Trial was pruned at epoch 28.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.27853114634107, Current params: {'input_chunk_length': 48, 'dropout': 0.35, 'batch_size': 208, 'lr': 0.0017474456290397517, 'weight_decay': 2.0116220935088707e-05, 'nhead': 2, 'd_model': 216, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.344
Metric val_loss improved by 0.119 >= min_delta = 0.0005. New best score: 12.226
Metric val_loss improved by 0.027 >= min_delta = 0.0005. New best score: 12.199
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.197
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.196
Monitored metric val_loss did not improve in the last 15 records. Best score: 12.196. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 43, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 91.60264516819726

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.994 >= min_delta = 0.0005. New best score: 0.202
Metric val_loss improved by 0.082 >= min_delta = 0.0005. New best score: 0.119
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.118
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.113
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.112
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 0.098
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 0.082
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.077
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.076
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.074
Metric val_loss improved by 0.007 >= min_delta = 0.

Model loaded from checkpoint for trial 43, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 42.72579986876542

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.049. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 43, fold 2


[I 2025-06-15 14:40:44,035] Trial 43 finished with value: 64.63506816168899 and parameters: {'input_chunk_length': 36, 'dropout': 0.4, 'batch_size': 192, 'lr': 0.00017827282633805212, 'weight_decay': 1.7113885114332158e-06, 'nhead': 2, 'd_model': 200, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}. Best is trial 2 with value: 62.29155632988628.
[I 2025-06-15 14:40:44,059] Trial 44 pruned. d_model (200) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 59.57675944810429
Média dos SMAPE nos folds: 64.63506816168899
Results of trial 43 saved in optuna_iteration_metrics/trial_43.json
Current value: 64.63506816168899, Current params: {'input_chunk_length': 36, 'dropout': 0.4, 'batch_size': 192, 'lr': 0.00017827282633805212, 'weight_decay': 1.7113885114332158e-06, 'nhead': 2, 'd_model': 200, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 24, 'dropout': 0.4, 'batch_size': 176, 'lr': 0.00017417285342679898, 'weight_decay': 1.8656630779431532e-06, 'nhead': 3, 'd_model': 200}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_si

[I 2025-06-15 14:40:56,369] Trial 45 pruned. Trial was pruned at epoch 0.
[I 2025-06-15 14:40:56,393] Trial 46 pruned. d_model (232) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.8366160969205, Current params: {'input_chunk_length': 96, 'dropout': 0.45000000000000007, 'batch_size': 192, 'lr': 0.00038841571931777806, 'weight_decay': 4.1195494134822985e-06, 'nhead': 2, 'd_model': 192, 'num_encoder_layers': 4, 'num_decoder_layers': 3, 'activation': 'GEGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 60, 'dropout': 0.4, 'batch_size': 168, 'lr': 0.00011210304664926231, 'weight_decay': 1.5278319970826689e-06, 'nhead': 3, 'd_model': 232}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'n

[I 2025-06-15 14:41:06,057] Trial 47 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.5151362281107, Current params: {'input_chunk_length': 48, 'dropout': 0.2, 'batch_size': 136, 'lr': 2.6887193457745478e-05, 'weight_decay': 1.4876781552025796e-06, 'nhead': 2, 'd_model': 248, 'num_encoder_layers': 3, 'num_decoder_layers': 4, 'activation': 'SwiGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 14:41:14,641] Trial 48 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.684489077332483, Current params: {'input_chunk_length': 84, 'dropout': 0.45000000000000007, 'batch_size': 232, 'lr': 0.0002106219302072863, 'weight_decay': 3.0592308921035202e-06, 'nhead': 8, 'd_model': 168, 'num_encoder_layers': 1, 'num_decoder_layers': 6, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 14:41:30,657] Trial 49 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 13.133215742206755, Current params: {'input_chunk_length': 132, 'dropout': 0.5, 'batch_size': 96, 'lr': 9.871695630612456e-05, 'weight_decay': 7.509091129647602e-06, 'nhead': 6, 'd_model': 192, 'num_encoder_layers': 5, 'num_decoder_layers': 2, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 14:41:39,585] Trial 50 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.626738338195201, Current params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 152, 'lr': 4.888403734771858e-05, 'weight_decay': 2.0546258113525914e-06, 'nhead': 2, 'd_model': 104, 'num_encoder_layers': 4, 'num_decoder_layers': 3, 'activation': 'ReGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.181
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 12.168
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.152
Metric val_loss improved by 0.021 >= min_delta = 0.0005. New best score: 12.131
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.130
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.118
Monitored metric val_loss did not improve in the last 15 records. Best score: 12.118. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 51, fold 0


Exception ignored in: <function _ConnectionBase.__del__ at 0x7a1c4f8b9b20>
Traceback (most recent call last):
  File "/usr/lib/python3.12/multiprocessing/connection.py", line 133, in __del__
    self._close()
  File "/usr/lib/python3.12/multiprocessing/connection.py", line 377, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 88.12650962368649

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.826 >= min_delta = 0.0005. New best score: 0.293
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.293. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 51, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 93.56360576960427

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 0.114 >= min_delta = 0.0005. New best score: 0.179
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.179. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 51, fold 2


[I 2025-06-15 14:56:31,307] Trial 51 finished with value: 78.0945946744279 and parameters: {'input_chunk_length': 36, 'dropout': 0.35, 'batch_size': 200, 'lr': 0.004806145477452508, 'weight_decay': 2.1226141068270085e-05, 'nhead': 2, 'd_model': 224, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}. Best is trial 2 with value: 62.29155632988628.
[I 2025-06-15 14:56:31,329] Trial 52 pruned. d_model (208) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True


SMAPE fold 2: 52.593668629992955
Média dos SMAPE nos folds: 78.0945946744279
Results of trial 51 saved in optuna_iteration_metrics/trial_51.json
Current value: 78.0945946744279, Current params: {'input_chunk_length': 36, 'dropout': 0.35, 'batch_size': 200, 'lr': 0.004806145477452508, 'weight_decay': 2.1226141068270085e-05, 'nhead': 2, 'd_model': 224, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 36, 'dropout': 0.30000000000000004, 'batch_size': 184, 'lr': 1.4034275308675776e-05, 'weight_decay': 4.839478525903554e-06, 'nhead': 3, 'd_model': 208}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0

TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved. New best score: 12.207
Metric val_loss improved by 0.102 >= min_delta = 0.0005. New best score: 12.106
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.098
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.093
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.086
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 12.074
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 12.061
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.059
Monitored metric val_loss did not improve in the last 15 records. Best score: 12.059. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 53, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 108.82329279526323

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.840 >= min_delta = 0.0005. New best score: 0.219
Metric val_loss improved by 0.084 >= min_delta = 0.0005. New best score: 0.135
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 0.123
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.115
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.110
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.110
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 0.098
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 0.083
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 0.073
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.072
Metric val_loss improved by 0.004 >= min_delta = 0.

Model loaded from checkpoint for trial 53, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 46.39374218745687

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.051. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 53, fold 2


[I 2025-06-15 15:07:37,991] Trial 53 finished with value: 70.74185405544479 and parameters: {'input_chunk_length': 24, 'dropout': 0.35, 'batch_size': 216, 'lr': 0.0001610045748976936, 'weight_decay': 1.3918165552016579e-05, 'nhead': 2, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}. Best is trial 2 with value: 62.29155632988628.
[I 2025-06-15 15:07:38,013] Trial 54 pruned. d_model (232) must be divisible by nhead (3)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 57.008527183614305
Média dos SMAPE nos folds: 70.74185405544479
Results of trial 53 saved in optuna_iteration_metrics/trial_53.json
Current value: 70.74185405544479, Current params: {'input_chunk_length': 24, 'dropout': 0.35, 'batch_size': 216, 'lr': 0.0001610045748976936, 'weight_decay': 1.3918165552016579e-05, 'nhead': 2, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 24, 'dropout': 0.4, 'batch_size': 232, 'lr': 0.0001658328445521009, 'weight_decay': 3.678820432308386e-05, 'nhead': 3, 'd_model': 232}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_siz

[I 2025-06-15 15:07:47,977] Trial 55 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.497592612897186, Current params: {'input_chunk_length': 24, 'dropout': 0.35, 'batch_size': 168, 'lr': 1.008532108614347e-06, 'weight_decay': 1.3248194204673623e-05, 'nhead': 2, 'd_model': 200, 'num_encoder_layers': 6, 'num_decoder_layers': 1, 'activation': 'SwiGLU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 15:07:57,481] Trial 56 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.537825380334064, Current params: {'input_chunk_length': 48, 'dropout': 0.45000000000000007, 'batch_size': 200, 'lr': 0.0002477118739381666, 'weight_decay': 1.0010413070569863e-06, 'nhead': 8, 'd_model': 128, 'num_encoder_layers': 3, 'num_decoder_layers': 4, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.333
Metric val_loss improved by 0.097 >= min_delta = 0.0005. New best score: 12.235
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.221
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.219
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.217
Metric val_loss improved by 0.018 >= min_delta = 0.0005. New best score: 12.199
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.195
[I 2025-06-15 15:12:00,097] Trial 57 pruned. Trial was pruned at epoch 20.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.32954789403234, Current params: {'input_chunk_length': 48, 'dropout': 0.4, 'batch_size': 224, 'lr': 0.00040488779676882544, 'weight_decay': 3.024851125271891e-06, 'nhead': 4, 'd_model': 248, 'num_encoder_layers': 5, 'num_decoder_layers': 6, 'activation': 'Bilinear'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.118
Metric val_loss improved by 0.038 >= min_delta = 0.0005. New best score: 12.081
Metric val_loss improved by 0.023 >= min_delta = 0.0005. New best score: 12.057
Metric val_loss improved by 0.030 >= min_delta = 0.0005. New best score: 12.027
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.026
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 12.013
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 12.007
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.003
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.000
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 11.999
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 11.990
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 11.985
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 58, fold 0


Exception in thread Exception ignored in: <function _ConnectionBase.__del__ at 0x7a1c4f8b9b20>
Traceback (most recent call last):
  File "/usr/lib/python3.12/multiprocessing/connection.py", line 133, in __del__
QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.12/multiprocessing/queues.py", line 259, in _feed
    self._close()
  File "/usr/lib/python3.12/multiprocessing/connection.py", line 377, in _close
    reader_close()
  File "/usr/lib/python3.12/multiprocessing/connection.py", line 178, in close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
    self._close()
  File "/usr/lib/python3.12/multiprocessing/connection.py", line 377, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/home/eduardo/Water-Cycle-Neural-Net

SMAPE fold 0: 90.78440533799565

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.833 >= min_delta = 0.0005. New best score: 0.126
Metric val_loss improved by 0.018 >= min_delta = 0.0005. New best score: 0.109
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.105
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.103
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 0.092
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.086
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.080
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.075
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.071
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.069
Metric val_loss improved by 0.010 >= min_delta = 0.

Model loaded from checkpoint for trial 58, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 50.1865819917503

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.056. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 58, fold 2


[I 2025-06-15 15:26:02,894] Trial 58 finished with value: 63.67341604492689 and parameters: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 128, 'lr': 9.343988457157128e-05, 'weight_decay': 5.6377610166776286e-06, 'nhead': 7, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 50.04926080503471
Média dos SMAPE nos folds: 63.67341604492689
Results of trial 58 saved in optuna_iteration_metrics/trial_58.json
Current value: 63.67341604492689, Current params: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 128, 'lr': 9.343988457157128e-05, 'weight_decay': 5.6377610166776286e-06, 'nhead': 7, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.159
Metric val_loss improved by 0.045 >= min_delta = 0.0005. New best score: 12.114
Metric val_loss improved by 0.025 >= min_delta = 0.0005. New best score: 12.089
Metric val_loss improved by 0.020 >= min_delta = 0.0005. New best score: 12.069
Metric val_loss improved by 0.020 >= min_delta = 0.0005. New best score: 12.049
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.037
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.026
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.024
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.022
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.014
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.013
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.011
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 59, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 105.40734395535253

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.821 >= min_delta = 0.0005. New best score: 0.177
Metric val_loss improved by 0.069 >= min_delta = 0.0005. New best score: 0.108
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.107
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.105
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.101
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.095
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.092
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.087
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.082
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.076
Metric val_loss improved by 0.004 >= min_delta = 0.

Model loaded from checkpoint for trial 59, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 49.2041197682071

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.060. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 59, fold 2


[I 2025-06-15 15:36:05,523] Trial 59 finished with value: 68.48161878293119 and parameters: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 128, 'lr': 5.475178750781707e-05, 'weight_decay': 5.762107452017487e-06, 'nhead': 7, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
[I 2025-06-15 15:36:05,545] Trial 60 pruned. d_model (248) must be divisible by nhead (7)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 50.83339262523394
Média dos SMAPE nos folds: 68.48161878293119
Results of trial 59 saved in optuna_iteration_metrics/trial_59.json
Current value: 68.48161878293119, Current params: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 128, 'lr': 5.475178750781707e-05, 'weight_decay': 5.762107452017487e-06, 'nhead': 7, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 120, 'lr': 3.162924296125851e-05, 'weight_decay': 5.86534892531343e-06, 'nhead': 7, 'd_model': 248}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'd

Metric val_loss improved. New best score: 12.230
Metric val_loss improved by 0.042 >= min_delta = 0.0005. New best score: 12.188
Metric val_loss improved by 0.027 >= min_delta = 0.0005. New best score: 12.160
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 12.144
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.137
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 12.132
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.127
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.123
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 12.114
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.112
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.111
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.107
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score

Current value: 12.079698149314773, Current params: {'input_chunk_length': 36, 'dropout': 0.1, 'batch_size': 112, 'lr': 5.254315288776555e-05, 'weight_decay': 3.658862820914498e-06, 'nhead': 7, 'd_model': 224, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.147
Metric val_loss improved by 0.046 >= min_delta = 0.0005. New best score: 12.101
Metric val_loss improved by 0.027 >= min_delta = 0.0005. New best score: 12.074
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.071
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.070
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.069
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.066
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.065
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.064
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.059
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.052
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.047
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 62, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 93.3448385287866

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.878 >= min_delta = 0.0005. New best score: 0.120
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.117
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.108
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 0.098
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 0.088
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.080
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.075
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.071
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.062
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.062
Metric val_loss improved by 0.001 >= min_delta = 0.

Model loaded from checkpoint for trial 62, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 45.012393305496346

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 16 records. Best score: 0.054. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 62, fold 2


[I 2025-06-15 15:50:02,284] Trial 62 finished with value: 64.1140227283936 and parameters: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 104, 'lr': 9.244863357766163e-05, 'weight_decay': 9.05179596257353e-06, 'nhead': 8, 'd_model': 216, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 53.9848363508979
Média dos SMAPE nos folds: 64.1140227283936
Results of trial 62 saved in optuna_iteration_metrics/trial_62.json
Current value: 64.1140227283936, Current params: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 104, 'lr': 9.244863357766163e-05, 'weight_decay': 9.05179596257353e-06, 'nhead': 8, 'd_model': 216, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.150
Metric val_loss improved by 0.049 >= min_delta = 0.0005. New best score: 12.101
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.087
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.072
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.070
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.067
Metric val_loss improved by 0.017 >= min_delta = 0.0005. New best score: 12.050
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.045
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 12.039
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.029
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.028
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.027
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 63, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 84.5412824020712

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.879 >= min_delta = 0.0005. New best score: 0.118
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.116
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.113
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.106
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.101
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 0.093
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.088
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.078
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.074
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.071
Metric val_loss improved by 0.003 >= min_delta = 0.

Model loaded from checkpoint for trial 63, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 49.88630893797548

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.057. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 63, fold 2


[I 2025-06-15 16:01:53,674] Trial 63 finished with value: 62.47045017338937 and parameters: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 128, 'lr': 9.010608645507955e-05, 'weight_decay': 1.0363081595907054e-05, 'nhead': 8, 'd_model': 232, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 52.98375918012141
Média dos SMAPE nos folds: 62.47045017338937
Results of trial 63 saved in optuna_iteration_metrics/trial_63.json
Current value: 62.47045017338937, Current params: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 128, 'lr': 9.010608645507955e-05, 'weight_decay': 1.0363081595907054e-05, 'nhead': 8, 'd_model': 232, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 16:02:05,401] Trial 64 pruned. Trial was pruned at epoch 0.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.383472660044752, Current params: {'input_chunk_length': 36, 'dropout': 0.1, 'batch_size': 128, 'lr': 2.0878915084380892e-05, 'weight_decay': 2.341047464283192e-06, 'nhead': 8, 'd_model': 232, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.122
Metric val_loss improved by 0.029 >= min_delta = 0.0005. New best score: 12.093
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.090
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.088
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.082
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.080
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.078
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.071
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.068
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.058
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.055
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.054
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score

Current value: 12.01255144069077, Current params: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 104, 'lr': 8.741795472281111e-05, 'weight_decay': 7.688426250955922e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 4, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7a1bfe7e3b00>
Traceback (most recent call last):
  File "/home/eduardo/Water-Cycle-Neural-Network/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/home/eduardo/Water-Cycle-Neural-Network/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
Exception ignored in:  <function _MultiProcessingDataLoaderIter.__del__ at 0x7a1bfe7e3b00> 
 Traceback (most recent call last):
   File "/home/eduardo/Water-Cycle-Neural-Network/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1618, in __del__
       self._shutdown_workers()^
^  File "/home/eduardo/Water-Cycle-Neural-Network/venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
^    ^if w.is_alive():^
^^ ^ ^ ^ ^ ^ 
   File "/usr/lib/python3.12/multiprocessing/process

Current value: 12.083758393668926, Current params: {'input_chunk_length': 36, 'dropout': 0.1, 'batch_size': 136, 'lr': 3.462910752867313e-05, 'weight_decay': 9.211579277010253e-06, 'nhead': 8, 'd_model': 208, 'num_encoder_layers': 4, 'num_decoder_layers': 4, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.173
Metric val_loss improved by 0.064 >= min_delta = 0.0005. New best score: 12.109
Metric val_loss improved by 0.023 >= min_delta = 0.0005. New best score: 12.086
Metric val_loss improved by 0.020 >= min_delta = 0.0005. New best score: 12.066
Metric val_loss improved by 0.018 >= min_delta = 0.0005. New best score: 12.048
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.040
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.036
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.031
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.029
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.026
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.022
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.018
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score

Model loaded from checkpoint for trial 67, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 116.68056845873684

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.811 >= min_delta = 0.0005. New best score: 0.172
Metric val_loss improved by 0.056 >= min_delta = 0.0005. New best score: 0.117
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.115
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.110
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.103
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.099
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 0.086
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.083
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.076
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 0.073
Metric val_loss improved by 0.002 >= min_delta = 0.

Model loaded from checkpoint for trial 67, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 49.16803206884087

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.058. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 67, fold 2


[I 2025-06-15 16:21:18,219] Trial 67 finished with value: 71.77155977162117 and parameters: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 120, 'lr': 5.43244412938551e-05, 'weight_decay': 5.154446673881612e-06, 'nhead': 7, 'd_model': 224, 'num_encoder_layers': 3, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 49.46607878728578
Média dos SMAPE nos folds: 71.77155977162117
Results of trial 67 saved in optuna_iteration_metrics/trial_67.json
Current value: 71.77155977162117, Current params: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 120, 'lr': 5.43244412938551e-05, 'weight_decay': 5.154446673881612e-06, 'nhead': 7, 'd_model': 224, 'num_encoder_layers': 3, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 16:21:31,385] Trial 68 pruned. Trial was pruned at epoch 0.
[I 2025-06-15 16:21:31,412] Trial 69 pruned. d_model (200) must be divisible by nhead (7)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.324535501788777, Current params: {'input_chunk_length': 48, 'dropout': 0.1, 'batch_size': 104, 'lr': 6.548699598478538e-05, 'weight_decay': 3.824232299228709e-06, 'nhead': 8, 'd_model': 248, 'num_encoder_layers': 4, 'num_decoder_layers': 6, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 88, 'lr': 0.0001291605627634291, 'weight_decay': 9.759524167308715e-06, 'nhead': 7, 'd_model': 200}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 

Metric val_loss improved. New best score: 12.180
Metric val_loss improved by 0.031 >= min_delta = 0.0005. New best score: 12.149
Metric val_loss improved by 0.027 >= min_delta = 0.0005. New best score: 12.122
Metric val_loss improved by 0.022 >= min_delta = 0.0005. New best score: 12.100
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.097
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.094
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 12.087
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.075
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.071
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.065
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 12.060
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.058
[I 2025-06-15 16:25:59,842] Trial 70 pruned. Trial was pruned at epoch 

Current value: 12.06540373544669, Current params: {'input_chunk_length': 36, 'dropout': 0.15000000000000002, 'batch_size': 144, 'lr': 8.961621181849766e-05, 'weight_decay': 1.4974504533079818e-06, 'nhead': 8, 'd_model': 256, 'num_encoder_layers': 4, 'num_decoder_layers': 4, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved. New best score: 12.079
Metric val_loss improved by 0.021 >= min_delta = 0.0005. New best score: 12.058
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 12.044
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.040
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.040
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.038
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.034
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 12.019
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 12.017
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.014
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.006
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.004
Metric val_loss improved by 0

Model loaded from checkpoint for trial 71, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 100.43094937565512

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.823 >= min_delta = 0.0005. New best score: 0.126
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.117
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 0.105
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.099
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 0.084
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 0.076
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 0.063
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.061
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.060
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.055
Metric val_loss improved by 0.002 >= min_delta = 0.

Model loaded from checkpoint for trial 71, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 46.58178281188624

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.049. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 71, fold 2


[I 2025-06-15 16:39:55,951] Trial 71 finished with value: 67.26733212563587 and parameters: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 120, 'lr': 0.0001557223534607635, 'weight_decay': 1.4511951218892298e-05, 'nhead': 8, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 54.78926418936627
Média dos SMAPE nos folds: 67.26733212563587
Results of trial 71 saved in optuna_iteration_metrics/trial_71.json
Current value: 67.26733212563587, Current params: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 120, 'lr': 0.0001557223534607635, 'weight_decay': 1.4511951218892298e-05, 'nhead': 8, 'd_model': 216, 'num_encoder_layers': 5, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.087
Metric val_loss improved by 0.029 >= min_delta = 0.0005. New best score: 12.057
Metric val_loss improved by 0.053 >= min_delta = 0.0005. New best score: 12.004
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 11.991
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 11.983
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 11.978
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 11.974
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 11.968
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 11.963
Metric val_loss improved by 0.015 >= min_delta = 0.0005. New best score: 11.948
`Trainer.fit` stopped: `max_epochs=30` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 72, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 101.23058096779744

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.828 >= min_delta = 0.0005. New best score: 0.121
Metric val_loss improved by 0.014 >= min_delta = 0.0005. New best score: 0.107
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.106
Metric val_loss improved by 0.013 >= min_delta = 0.0005. New best score: 0.093
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 0.082
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 0.071
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 0.060
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 0.052
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 0.051
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 0.049
Metric val_loss improved by 0.001 >= min_delta = 0.

Model loaded from checkpoint for trial 72, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 47.30086965028964

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.048. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 72, fold 2


[I 2025-06-15 16:52:00,869] Trial 72 finished with value: 68.05425102486997 and parameters: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 120, 'lr': 0.00013969075237271902, 'weight_decay': 2.9531953937327187e-05, 'nhead': 8, 'd_model': 216, 'num_encoder_layers': 6, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 55.63130245652284
Média dos SMAPE nos folds: 68.05425102486997
Results of trial 72 saved in optuna_iteration_metrics/trial_72.json
Current value: 68.05425102486997, Current params: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 120, 'lr': 0.00013969075237271902, 'weight_decay': 2.9531953937327187e-05, 'nhead': 8, 'd_model': 216, 'num_encoder_layers': 6, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


Metric val_loss improved. New best score: 12.084
Metric val_loss improved by 0.028 >= min_delta = 0.0005. New best score: 12.056
Metric val_loss improved by 0.031 >= min_delta = 0.0005. New best score: 12.025
Metric val_loss improved by 0.011 >= min_delta = 0.0005. New best score: 12.014
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.011
Metric val_loss improved by 0.003 >= min_delta = 0.0005. New best score: 12.007
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 11.999
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 11.997
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 11.989
Metric val_loss improved by 0.002 >= min_delta = 0.0005. New best score: 11.987
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 11.975
`Trainer.fit` stopped: `max_epochs=30` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, u

Model loaded from checkpoint for trial 73, fold 0


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 0: 101.56142062936621

Fold 2/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Metric val_loss improved by 11.851 >= min_delta = 0.0005. New best score: 0.125
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 0.119
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 0.110
Metric val_loss improved by 0.007 >= min_delta = 0.0005. New best score: 0.103
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 0.096
Metric val_loss improved by 0.016 >= min_delta = 0.0005. New best score: 0.080
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.076
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.071
Metric val_loss improved by 0.005 >= min_delta = 0.0005. New best score: 0.065
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 0.062
Metric val_loss improved by 0.005 >= min_delta = 0.

Model loaded from checkpoint for trial 73, fold 1


Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.


SMAPE fold 1: 45.96251355920105

Fold 3/3


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Monitored metric val_loss did not improve in the last 15 records. Best score: 0.052. Signaling Trainer to stop.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Model loaded from checkpoint for trial 73, fold 2


[I 2025-06-15 17:04:38,788] Trial 73 finished with value: 68.6430062232998 and parameters: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 120, 'lr': 0.0001048743847724991, 'weight_decay': 2.9856891086713972e-05, 'nhead': 8, 'd_model': 232, 'num_encoder_layers': 6, 'num_decoder_layers': 5, 'activation': 'GELU'}. Best is trial 2 with value: 62.29155632988628.
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


SMAPE fold 2: 58.40508448133215
Média dos SMAPE nos folds: 68.6430062232998
Results of trial 73 saved in optuna_iteration_metrics/trial_73.json
Current value: 68.6430062232998, Current params: {'input_chunk_length': 24, 'dropout': 0.15000000000000002, 'batch_size': 120, 'lr': 0.0001048743847724991, 'weight_decay': 2.9856891086713972e-05, 'nhead': 8, 'd_model': 232, 'num_encoder_layers': 6, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}

Fold 1/3


[I 2025-06-15 17:04:51,001] Trial 74 pruned. Trial was pruned at epoch 0.
[I 2025-06-15 17:04:51,028] Trial 75 pruned. d_model (232) must be divisible by nhead (7)
Specified future encoders in `add_encoders` at model creation but model does not accept future covariates. future encoders will be ignored.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Current value: 12.196683422671029, Current params: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 136, 'lr': 3.5573742598032355e-05, 'weight_decay': 6.042903920076551e-05, 'nhead': 8, 'd_model': 216, 'num_encoder_layers': 6, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}
Current value: None, Current params: {'input_chunk_length': 24, 'dropout': 0.1, 'batch_size': 128, 'lr': 6.048148115509383e-05, 'weight_decay': 1.4264154570853813e-05, 'nhead': 7, 'd_model': 232}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers

Metric val_loss improved. New best score: 12.154
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.143
Metric val_loss improved by 0.019 >= min_delta = 0.0005. New best score: 12.124
Metric val_loss improved by 0.008 >= min_delta = 0.0005. New best score: 12.116
Metric val_loss improved by 0.006 >= min_delta = 0.0005. New best score: 12.110
Metric val_loss improved by 0.022 >= min_delta = 0.0005. New best score: 12.088
Metric val_loss improved by 0.009 >= min_delta = 0.0005. New best score: 12.079
Metric val_loss improved by 0.012 >= min_delta = 0.0005. New best score: 12.067
Metric val_loss improved by 0.004 >= min_delta = 0.0005. New best score: 12.063
Metric val_loss improved by 0.001 >= min_delta = 0.0005. New best score: 12.062
Metric val_loss improved by 0.010 >= min_delta = 0.0005. New best score: 12.053
[I 2025-06-15 17:10:30,578] Trial 76 pruned. Trial was pruned at epoch 25.


Current value: 12.063115181240581, Current params: {'input_chunk_length': 36, 'dropout': 0.15000000000000002, 'batch_size': 112, 'lr': 0.00014087792115874672, 'weight_decay': 1.0475115870362078e-05, 'nhead': 8, 'd_model': 208, 'num_encoder_layers': 6, 'num_decoder_layers': 5, 'activation': 'GELU'}
Best value: 62.29155632988628, Best params: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}


In [9]:
print(f"Best SMAPE Value (Minimum): {study.best_value}")
print(f"Best Parameters: {study.best_params}")

Best SMAPE Value (Minimum): 62.29155632988628
Best Parameters: {'input_chunk_length': 72, 'dropout': 0.4, 'batch_size': 112, 'lr': 3.371095418689331e-05, 'weight_decay': 1.302793079680627e-06, 'nhead': 8, 'd_model': 240, 'num_encoder_layers': 1, 'num_decoder_layers': 3, 'activation': 'SwiGLU'}


In [10]:
best_dict = {
    "best_value": study.best_value,
    "best_params": study.best_params,
}
json_path = os.path.join(results_output_dir, "best_trial.json")
with open(json_path, 'w') as f:
    json.dump(best_dict, f, indent=4)
print(f"Best results saved in {json_path}")

Best results saved in optuna_iteration_metrics/best_trial.json
