In [2]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)
    
from src.predictionModule.LoadupSamples import LoadupSamples
from src.predictionModule.FilterSamples import FilterSamples

import numpy as np
import polars as pl
import datetime
import random
import matplotlib.pyplot as plt

import logging
formatted_date = datetime.datetime.now().strftime("%d%b%y_%H%M").lower()

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter(fmt="%(asctime)s - %(message)s")
handler.setFormatter(formatter)
if not logger.hasHandlers():
    logger.addHandler(handler)
else:
    logger.handlers[:] = [handler]

#Output File handler
formatted_str = f"notebook-lstm-optuna-{formatted_date}"
file_handler = logging.FileHandler(f"{formatted_str}.log", mode="w")
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# Usage
logger.info("This will print to the notebook's output cell")

2025-07-11 22:31:40,700 - This will print to the notebook's output cell


In [None]:
params = {
    "idxAfterPrediction": 5,
    'timesteps': 60,
    'target_option': 'last',
    
    "TreeTime_lstm_units": 32,
    "TreeTime_lstm_num_layers": 3,
    "TreeTime_lstm_dropout": 0.00001,
    "TreeTime_lstm_recurrent_dropout": 0.00001,
    "TreeTime_lstm_learning_rate": 0.001,
    "TreeTime_lstm_optimizer": "adam",
    "TreeTime_lstm_bidirectional": True,
    "TreeTime_lstm_batch_size": 2**12,
    "TreeTime_lstm_epochs": 20,
    "TreeTime_lstm_l1": 0.00001,
    "TreeTime_lstm_l2": 0.00001,
    "TreeTime_inter_dropout": 0.00001,
    "TreeTime_input_gaussian_noise": 0.00001,
    "TreeTime_lstm_conv1d": True,
    "TreeTime_lstm_conv1d_kernel_size": 3,
    "TreeTime_lstm_loss": "mse",
}

In [4]:
stock_group = "group_snp500_finanTo2011"

eval_dates = sorted([
    datetime.date(2025,  2,  1) - datetime.timedelta(days=i*60 + random.randint(-10,10)) 
    for i in range(1)
])

years_back = 9
start_Dates = [eval_date - datetime.timedelta(days=365 * years_back) for eval_date in eval_dates]
start_Dates = [datetime.date(year=start_date.year, month=1, day=1) for start_date in start_Dates]

In [5]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm, trange
import shap

class TreeTimeLSTM(nn.Module):
    def __init__(self, 
                 input_size,
                 lstm_units,
                 num_layers,
                 dropout,
                 recurrent_dropout,
                 bidirectional,
                 l1=0.0,
                 l2=0.0,
                 use_conv1d=False,
                 conv_kernel=3,
                 noise_std=0.0,
                 inter_dropout=0.0):
        super().__init__()
        self.use_conv1d = use_conv1d
        self.noise_std = noise_std
        self.inter_dropout = inter_dropout

        if use_conv1d:
            self.conv1d = nn.Conv1d(
                in_channels=input_size,
                out_channels=lstm_units,
                kernel_size=conv_kernel,
                padding=conv_kernel//2
            )
            input_size = lstm_units

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=lstm_units,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0.0,
            bidirectional=bidirectional,
            batch_first=True
        )
        self.dropout = nn.Dropout(inter_dropout) if inter_dropout > 0 else None
        self.output = nn.Linear(
            lstm_units * (2 if bidirectional else 1),
            1
        )
        self.l1 = l1
        self.l2 = l2

    def forward(self, x):
        if self.noise_std > 0:
            x = x + torch.randn_like(x) * self.noise_std
        if self.use_conv1d:
            x = x.transpose(1, 2)
            x = self.conv1d(x)
            x = x.transpose(1, 2)
        out, _ = self.lstm(x)
        out_last = out[:, -1, :]
        if self.dropout:
            out_last = self.dropout(out_last)
        return self.output(out_last)

# Loss functions
def quantile_loss(q):
    def loss_fn(y_pred, y_true):
        e = y_true - y_pred
        return torch.mean(torch.max(q * e, (q - 1) * e))
    return loss_fn

def r2_metric(y_pred, y_true):
    ss_res = torch.sum((y_true - y_pred) ** 2)
    ss_tot = torch.sum((y_true - torch.mean(y_true)) ** 2)
    return 1 - ss_res / (ss_tot + 1e-6)

def neg_r2_loss(y_pred, y_true):
    return -r2_metric(y_pred, y_true)


def run(params, train_Xtime, train_ytime, training_ratio=0.95, device='cpu'):
    # Hyperparameters
    lstm_units = params['TreeTime_lstm_units']
    num_layers = params['TreeTime_lstm_num_layers']
    dropout = params['TreeTime_lstm_dropout']
    recurrent_dropout = params['TreeTime_lstm_recurrent_dropout']
    learning_rate = params['TreeTime_lstm_learning_rate']
    optimizer_name = params['TreeTime_lstm_optimizer']
    bidirectional = params['TreeTime_lstm_bidirectional']
    batch_size = params['TreeTime_lstm_batch_size']
    epochs = params['TreeTime_lstm_epochs']
    loss_name = params['TreeTime_lstm_loss']
    l1 = params.get('TreeTime_lstm_l1', 0.0)
    l2 = params.get('TreeTime_lstm_l2', 0.0)
    inter_dropout = params.get('TreeTime_inter_dropout', 0.0)
    noise_std = params.get('TreeTime_input_gaussian_noise', 0.0)
    use_conv1d = params.get('TreeTime_lstm_conv1d', False)
    conv_kernel = params.get('TreeTime_lstm_conv1d_kernel_size', 3)

    # Data split
    n_total = train_Xtime.shape[0]
    split_at = int(n_total * training_ratio)
    X_train, y_train = train_Xtime[:split_at], train_ytime[:split_at]
    X_val, y_val = train_Xtime[split_at:], train_ytime[split_at:]

    train_ds = TensorDataset(
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32)
    )
    val_ds = TensorDataset(
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.float32)
    )
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=False)
    val_loader = DataLoader(val_ds, batch_size=batch_size)

    # Model
    model = TreeTimeLSTM(
        input_size=train_Xtime.shape[-1],
        lstm_units=lstm_units,
        num_layers=num_layers,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        bidirectional=bidirectional,
        l1=l1,
        l2=l2,
        use_conv1d=use_conv1d,
        conv_kernel=conv_kernel,
        noise_std=noise_std,
        inter_dropout=inter_dropout
    ).to(device)

    # Loss & optimizer
    if loss_name == 'mse':
        criterion = nn.MSELoss()
    elif loss_name == 'r2':
        criterion = lambda pred, true: neg_r2_loss(pred, true)
    else:
        q = int(loss_name.split('_')[1]) / 10.0
        criterion = quantile_loss(q)
    optimizer = optim.Adam(
        model.parameters(), lr=learning_rate, weight_decay=l2
    )
    if optimizer_name == 'rmsprop':
        optimizer = optim.RMSprop(
            model.parameters(), lr=learning_rate, weight_decay=l2
        )
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, factor=0.5, patience=2
    )

    best_rmse, wait = float('inf'), 0
    start_time = time.time()

    for epoch in trange(epochs, desc='Epochs'):
        model.train()
        for X_batch, y_batch in tqdm(
            train_loader, desc='Training', leave=False
        ):
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            preds = model(X_batch).squeeze()
            loss = criterion(preds, y_batch)
            if l1 > 0:
                l1_penalty = sum(p.abs().sum() for p in model.parameters())
                loss += l1 * l1_penalty
            loss.backward()
            optimizer.step()
            if time.time() - start_time > 3600:
                break

        model.eval()
        val_rmses = []
        for X_batch, y_batch in tqdm(
            val_loader, desc='Validation', leave=False
        ):
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            preds = model(X_batch).squeeze()
            mse = nn.MSELoss()(preds, y_batch)
            val_rmses.append(torch.sqrt(mse).item())
        val_rmse = sum(val_rmses) / len(val_rmses)
        scheduler.step(val_rmse)

        if val_rmse < best_rmse:
            best_rmse, wait = val_rmse, 0
            best_state = model.state_dict()
        else:
            wait += 1
            if wait >= 3:
                break
        if time.time() - start_time > 3600:
            break

    model.load_state_dict(best_state)
    return best_rmse, model

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def shorttest(model, train_Xtime, train_ytree, train_ytime, device='cpu'):
    # Convert to torch tensor and send to device
    n = 20000
    X_tensor = torch.tensor(train_Xtime[-n:], dtype=torch.float32).to(device)

    # Put model into eval mode and disable grad
    model.eval()
    with torch.no_grad():
        preds = model(X_tensor)        # (N, 1) tensor
        preds = preds.squeeze(-1)      # (N,) tensor

    # Bring back to CPU NumPy array if you like
    preds = preds.cpu().numpy()

    true_val = train_ytree[-n:]

    rsme_err = np.sqrt(np.mean((preds - train_ytime[-n:])**2))
    q = 0.99
    mask_pred_above = preds >= np.quantile(preds, q)
    mask_pred_below = preds <= np.quantile(preds, 1-q)
    logger.info(f"  Mean error: {rsme_err:.4f}")
    logger.info(f"  Mean all prediction: {np.mean(true_val):.4f}")
    logger.info(f"  Mean above prediction: {np.mean(true_val[mask_pred_above]):.4f}")
    logger.info(f"  Mean below prediction: {np.mean(true_val[mask_pred_below]):.4f}")
    logger.info(f"  True values above zero: {np.sum(mask_pred_above)/len(mask_pred_above):.4f}")
    logger.info(f"  True values below zero: {np.sum(true_val[mask_pred_below])/len(mask_pred_below):.4f}")
    
    return np.mean(true_val[mask_pred_above]) + np.mean(true_val[mask_pred_below])

In [7]:
import optuna
split_f = 0.90  # Split fraction for train/test sets
def objective(trial: optuna.Trial) -> float:
    # Suggest hyperparameters
    opt_params = {
        "idxAfterPrediction": trial.suggest_int("idxAfterPrediction", 1, 5),
        "LoadupSamples_time_scaling_stretch": trial.suggest_categorical("time_scaling_stretch", [True, False]),
        "LoadupSamples_time_inc_factor": trial.suggest_int("time_inc_factor", 1, 20),
    }
    doFeatureReduce = trial.suggest_categorical("doFeatureReduce", [True, False])

    # Build and evaluate FilterSamples for each instance
    scores_test = []
    for i, date in enumerate(eval_dates):
        ls = LoadupSamples(
            train_start_date=start_Dates[i],
            test_dates=[date],
            group=stock_group,
            params={**params, **opt_params}
        )
        try:
            ls.load_samples(main_path = "../src/featureAlchemy/bin/")
            train_Xtree = ls.train_Xtree
            train_ytree = ls.train_ytree
            train_Xtime = ls.train_Xtime
            train_ytime = ls.train_ytime

            timenames = ls.featureTimeNames
            
            if doFeatureReduce:
                idx1 = np.where(timenames == "MathFeature_TradedPrice")[0][0]
                idx2 = np.where(timenames == "FeatureTA_High")[0][0]
                idx3 = np.where(timenames == "FeatureTA_Low")[0][0]
                idx4 = np.where(timenames == "FeatureTA_volume_obv")[0][0]

                train_Xtime = train_Xtime[:, :, [idx1, idx2, idx3, idx4]]
            
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
            val_rmse, model = run(params, train_Xtime, train_ytime, training_ratio=split_f, device=device)
            logger.info(f"Validation RMSE: {val_rmse:.4f}")
            a = shorttest(model, train_Xtime, train_ytree, train_ytime, device=device)
            
            score = 1.0 if a is None or np.isnan(a) else a
            
        except ValueError:
            score = 1.0
            
        scores_test.append(score)    
        
    logger.info(f"Trial number {trial.number}")
    logger.info(f"All scores (test) = {scores_test}") 
    
    fin_score = np.mean(np.log(scores_test))
    logger.info(f"Log mean of scores (test) {np.mean(np.log(scores_test))}")
    
    return float(fin_score)

optuna.logging.enable_propagation()
study = optuna.create_study(
    direction="maximize",
)
study.optimize(objective, timeout=60*60*1)

logger.info(f"Best parameters: {study.best_params}")
logger.info(f"Best score: {study.best_value}")

df = study.trials_dataframe()
logger.info("\nTrials DataFrame:")
logger.info(df.sort_values("value").to_string())

param_importances = optuna.importance.get_param_importances(study)
logger.info("Parameter Importances:")
for key, value in param_importances.items():
    logger.info(f"{key}: {value}")

[I 2025-07-11 22:31:41,226] A new study created in memory with name: no-name-be7baa61-ae93-43b1-90a3-0458a8bc4d22


2025-07-11 22:31:41,226 - A new study created in memory with name: no-name-be7baa61-ae93-43b1-90a3-0458a8bc4d22


Epochs:  20%|██        | 4/20 [00:42<02:51, 10.69s/it]


2025-07-11 22:33:55,937 - Validation RMSE: 0.2101
2025-07-11 22:33:56,175 -   Mean error: 0.2086
2025-07-11 22:33:56,175 -   Mean all prediction: 1.0021
2025-07-11 22:33:56,175 -   Mean above prediction: 1.0035
2025-07-11 22:33:56,175 -   Mean below prediction: 1.0059
2025-07-11 22:33:56,178 -   True values above zero: 0.0100
2025-07-11 22:33:56,178 -   True values below zero: 0.0101
2025-07-11 22:33:56,178 - Trial number 0
2025-07-11 22:33:56,178 - All scores (test) = [np.float32(2.009387)]
2025-07-11 22:33:56,180 - Log mean of scores (test) 0.6978297233581543


[I 2025-07-11 22:33:56,578] Trial 0 finished with value: 0.6978297233581543 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': False, 'time_inc_factor': 15, 'doFeatureReduce': False}. Best is trial 0 with value: 0.6978297233581543.


2025-07-11 22:33:56,578 - Trial 0 finished with value: 0.6978297233581543 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': False, 'time_inc_factor': 15, 'doFeatureReduce': False}. Best is trial 0 with value: 0.6978297233581543.


Epochs:  20%|██        | 4/20 [00:36<02:26,  9.17s/it]

2025-07-11 22:36:04,547 - Validation RMSE: 0.1644
2025-07-11 22:36:04,607 -   Mean error: 0.1629
2025-07-11 22:36:04,607 -   Mean all prediction: 1.0010
2025-07-11 22:36:04,607 -   Mean above prediction: 1.0035
2025-07-11 22:36:04,612 -   Mean below prediction: 1.0025
2025-07-11 22:36:04,612 -   True values above zero: 0.0100
2025-07-11 22:36:04,612 -   True values below zero: 0.0100
2025-07-11 22:36:04,612 - Trial number 1
2025-07-11 22:36:04,612 - All scores (test) = [np.float32(2.0059543)]
2025-07-11 22:36:04,615 - Log mean of scores (test) 0.6961199045181274



[I 2025-07-11 22:36:05,099] Trial 1 finished with value: 0.6961199045181274 and parameters: {'idxAfterPrediction': 2, 'time_scaling_stretch': False, 'time_inc_factor': 15, 'doFeatureReduce': True}. Best is trial 0 with value: 0.6978297233581543.


2025-07-11 22:36:05,099 - Trial 1 finished with value: 0.6961199045181274 and parameters: {'idxAfterPrediction': 2, 'time_scaling_stretch': False, 'time_inc_factor': 15, 'doFeatureReduce': True}. Best is trial 0 with value: 0.6978297233581543.


Epochs:  15%|█▌        | 3/20 [00:34<03:15, 11.49s/it]


2025-07-11 22:38:09,931 - Validation RMSE: 0.0551
2025-07-11 22:38:10,060 -   Mean error: 0.0551
2025-07-11 22:38:10,062 -   Mean all prediction: 1.0010
2025-07-11 22:38:10,062 -   Mean above prediction: 1.0021
2025-07-11 22:38:10,062 -   Mean below prediction: 1.0022
2025-07-11 22:38:10,064 -   True values above zero: 0.0100
2025-07-11 22:38:10,065 -   True values below zero: 0.0100
2025-07-11 22:38:10,065 - Trial number 2
2025-07-11 22:38:10,065 - All scores (test) = [np.float32(2.004335)]
2025-07-11 22:38:10,067 - Log mean of scores (test) 0.6953123211860657


[I 2025-07-11 22:38:10,480] Trial 2 finished with value: 0.6953123211860657 and parameters: {'idxAfterPrediction': 2, 'time_scaling_stretch': False, 'time_inc_factor': 4, 'doFeatureReduce': False}. Best is trial 0 with value: 0.6978297233581543.


2025-07-11 22:38:10,480 - Trial 2 finished with value: 0.6953123211860657 and parameters: {'idxAfterPrediction': 2, 'time_scaling_stretch': False, 'time_inc_factor': 4, 'doFeatureReduce': False}. Best is trial 0 with value: 0.6978297233581543.


Epochs: 100%|██████████| 20/20 [02:28<00:00,  7.42s/it]

2025-07-11 22:42:09,455 - Validation RMSE: 0.0646





2025-07-11 22:42:09,517 -   Mean error: 0.0647
2025-07-11 22:42:09,517 -   Mean all prediction: 1.0027
2025-07-11 22:42:09,519 -   Mean above prediction: 1.0049
2025-07-11 22:42:09,519 -   Mean below prediction: 1.0055
2025-07-11 22:42:09,520 -   True values above zero: 0.0100
2025-07-11 22:42:09,520 -   True values below zero: 0.0101
2025-07-11 22:42:09,520 - Trial number 3
2025-07-11 22:42:09,520 - All scores (test) = [np.float32(2.0103931)]
2025-07-11 22:42:09,522 - Log mean of scores (test) 0.698330283164978


[I 2025-07-11 22:42:09,989] Trial 3 finished with value: 0.698330283164978 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': False, 'time_inc_factor': 3, 'doFeatureReduce': True}. Best is trial 3 with value: 0.698330283164978.


2025-07-11 22:42:09,989 - Trial 3 finished with value: 0.698330283164978 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': False, 'time_inc_factor': 3, 'doFeatureReduce': True}. Best is trial 3 with value: 0.698330283164978.


Epochs: 100%|██████████| 20/20 [02:31<00:00,  7.57s/it]

2025-07-11 22:46:13,351 - Validation RMSE: 0.0551





2025-07-11 22:46:13,417 -   Mean error: 0.0549
2025-07-11 22:46:13,418 -   Mean all prediction: 1.0010
2025-07-11 22:46:13,418 -   Mean above prediction: 1.0021
2025-07-11 22:46:13,419 -   Mean below prediction: 1.0033
2025-07-11 22:46:13,420 -   True values above zero: 0.0100
2025-07-11 22:46:13,420 -   True values below zero: 0.0101
2025-07-11 22:46:13,420 - Trial number 4
2025-07-11 22:46:13,420 - All scores (test) = [np.float32(2.0053823)]
2025-07-11 22:46:13,422 - Log mean of scores (test) 0.6958346962928772


[I 2025-07-11 22:46:13,902] Trial 4 finished with value: 0.6958346962928772 and parameters: {'idxAfterPrediction': 2, 'time_scaling_stretch': False, 'time_inc_factor': 4, 'doFeatureReduce': True}. Best is trial 3 with value: 0.698330283164978.


2025-07-11 22:46:13,902 - Trial 4 finished with value: 0.6958346962928772 and parameters: {'idxAfterPrediction': 2, 'time_scaling_stretch': False, 'time_inc_factor': 4, 'doFeatureReduce': True}. Best is trial 3 with value: 0.698330283164978.


Epochs: 100%|██████████| 20/20 [02:32<00:00,  7.60s/it]

2025-07-11 22:50:27,992 - Validation RMSE: 0.1389





2025-07-11 22:50:28,056 -   Mean error: 0.1376
2025-07-11 22:50:28,057 -   Mean all prediction: 1.0005
2025-07-11 22:50:28,058 -   Mean above prediction: 1.0011
2025-07-11 22:50:28,059 -   Mean below prediction: 1.0037
2025-07-11 22:50:28,062 -   True values above zero: 0.0100
2025-07-11 22:50:28,063 -   True values below zero: 0.0101
2025-07-11 22:50:28,063 - Trial number 5
2025-07-11 22:50:28,064 - All scores (test) = [np.float32(2.0047863)]
2025-07-11 22:50:28,064 - Log mean of scores (test) 0.6955374479293823


[I 2025-07-11 22:50:28,506] Trial 5 finished with value: 0.6955374479293823 and parameters: {'idxAfterPrediction': 1, 'time_scaling_stretch': True, 'time_inc_factor': 17, 'doFeatureReduce': True}. Best is trial 3 with value: 0.698330283164978.


2025-07-11 22:50:28,506 - Trial 5 finished with value: 0.6955374479293823 and parameters: {'idxAfterPrediction': 1, 'time_scaling_stretch': True, 'time_inc_factor': 17, 'doFeatureReduce': True}. Best is trial 3 with value: 0.698330283164978.


Epochs: 100%|██████████| 20/20 [02:28<00:00,  7.42s/it]

2025-07-11 22:54:40,855 - Validation RMSE: 0.0444





2025-07-11 22:54:40,919 -   Mean error: 0.0445
2025-07-11 22:54:40,919 -   Mean all prediction: 1.0027
2025-07-11 22:54:40,921 -   Mean above prediction: 1.0048
2025-07-11 22:54:40,921 -   Mean below prediction: 1.0080
2025-07-11 22:54:40,921 -   True values above zero: 0.0100
2025-07-11 22:54:40,922 -   True values below zero: 0.0102
2025-07-11 22:54:40,922 - Trial number 6
2025-07-11 22:54:40,922 - All scores (test) = [np.float32(2.0128036)]
2025-07-11 22:54:40,922 - Log mean of scores (test) 0.6995285749435425


[I 2025-07-11 22:54:41,347] Trial 6 finished with value: 0.6995285749435425 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 2, 'doFeatureReduce': True}. Best is trial 6 with value: 0.6995285749435425.


2025-07-11 22:54:41,347 - Trial 6 finished with value: 0.6995285749435425 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 2, 'doFeatureReduce': True}. Best is trial 6 with value: 0.6995285749435425.


Epochs:  15%|█▌        | 3/20 [00:34<03:15, 11.49s/it]


2025-07-11 22:56:56,598 - Validation RMSE: 0.1631
2025-07-11 22:56:56,726 -   Mean error: 0.1625
2025-07-11 22:56:56,726 -   Mean all prediction: 1.0016
2025-07-11 22:56:56,726 -   Mean above prediction: 1.0004
2025-07-11 22:56:56,728 -   Mean below prediction: 1.0011
2025-07-11 22:56:56,728 -   True values above zero: 0.0100
2025-07-11 22:56:56,728 -   True values below zero: 0.0100
2025-07-11 22:56:56,728 - Trial number 7
2025-07-11 22:56:56,730 - All scores (test) = [np.float32(2.0014868)]
2025-07-11 22:56:56,730 - Log mean of scores (test) 0.6938902735710144


[I 2025-07-11 22:56:57,187] Trial 7 finished with value: 0.6938902735710144 and parameters: {'idxAfterPrediction': 3, 'time_scaling_stretch': True, 'time_inc_factor': 12, 'doFeatureReduce': False}. Best is trial 6 with value: 0.6995285749435425.


2025-07-11 22:56:57,187 - Trial 7 finished with value: 0.6938902735710144 and parameters: {'idxAfterPrediction': 3, 'time_scaling_stretch': True, 'time_inc_factor': 12, 'doFeatureReduce': False}. Best is trial 6 with value: 0.6995285749435425.


Epochs:  20%|██        | 4/20 [00:36<02:25,  9.08s/it]

2025-07-11 22:59:07,467 - Validation RMSE: 0.2573





2025-07-11 22:59:07,526 -   Mean error: 0.2549
2025-07-11 22:59:07,528 -   Mean all prediction: 1.0027
2025-07-11 22:59:07,528 -   Mean above prediction: 1.0057
2025-07-11 22:59:07,528 -   Mean below prediction: 1.0062
2025-07-11 22:59:07,528 -   True values above zero: 0.0100
2025-07-11 22:59:07,530 -   True values below zero: 0.0101
2025-07-11 22:59:07,530 - Trial number 8
2025-07-11 22:59:07,530 - All scores (test) = [np.float32(2.0119119)]
2025-07-11 22:59:07,530 - Log mean of scores (test) 0.6990854740142822


[I 2025-07-11 22:59:07,998] Trial 8 finished with value: 0.6990854740142822 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': False, 'time_inc_factor': 19, 'doFeatureReduce': True}. Best is trial 6 with value: 0.6995285749435425.


2025-07-11 22:59:07,998 - Trial 8 finished with value: 0.6990854740142822 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': False, 'time_inc_factor': 19, 'doFeatureReduce': True}. Best is trial 6 with value: 0.6995285749435425.


Epochs: 100%|██████████| 20/20 [02:25<00:00,  7.25s/it]

2025-07-11 23:03:17,531 - Validation RMSE: 0.1175





2025-07-11 23:03:17,593 -   Mean error: 0.1175
2025-07-11 23:03:17,594 -   Mean all prediction: 1.0027
2025-07-11 23:03:17,594 -   Mean above prediction: 1.0072
2025-07-11 23:03:17,595 -   Mean below prediction: 1.0115
2025-07-11 23:03:17,595 -   True values above zero: 0.0100
2025-07-11 23:03:17,597 -   True values below zero: 0.0102
2025-07-11 23:03:17,597 - Trial number 9
2025-07-11 23:03:17,598 - All scores (test) = [np.float32(2.01869)]
2025-07-11 23:03:17,598 - Log mean of scores (test) 0.702448844909668


[I 2025-07-11 23:03:18,070] Trial 9 finished with value: 0.702448844909668 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 6, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:03:18,070 - Trial 9 finished with value: 0.702448844909668 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 6, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs:  15%|█▌        | 3/20 [00:33<03:11, 11.24s/it]


2025-07-11 23:05:30,315 - Validation RMSE: 0.1352
2025-07-11 23:05:30,438 -   Mean error: 0.1352
2025-07-11 23:05:30,438 -   Mean all prediction: 1.0021
2025-07-11 23:05:30,438 -   Mean above prediction: 1.0040
2025-07-11 23:05:30,438 -   Mean below prediction: 1.0035
2025-07-11 23:05:30,438 -   True values above zero: 0.0100
2025-07-11 23:05:30,443 -   True values below zero: 0.0101
2025-07-11 23:05:30,443 - Trial number 10
2025-07-11 23:05:30,443 - All scores (test) = [np.float32(2.0075898)]
2025-07-11 23:05:30,443 - Log mean of scores (test) 0.6969348788261414


[I 2025-07-11 23:05:30,843] Trial 10 finished with value: 0.6969348788261414 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': False}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:05:30,843 - Trial 10 finished with value: 0.6969348788261414 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': False}. Best is trial 9 with value: 0.702448844909668.


Epochs:  20%|██        | 4/20 [00:36<02:24,  9.02s/it]

2025-07-11 23:07:46,543 - Validation RMSE: 0.1472
2025-07-11 23:07:46,603 -   Mean error: 0.1469
2025-07-11 23:07:46,604 -   Mean all prediction: 1.0027
2025-07-11 23:07:46,605 -   Mean above prediction: 1.0045
2025-07-11 23:07:46,605 -   Mean below prediction: 1.0103
2025-07-11 23:07:46,606 -   True values above zero: 0.0100
2025-07-11 23:07:46,606 -   True values below zero: 0.0102
2025-07-11 23:07:46,607 - Trial number 11
2025-07-11 23:07:46,608 - All scores (test) = [np.float32(2.0148005)]
2025-07-11 23:07:46,608 - Log mean of scores (test) 0.7005202174186707



[I 2025-07-11 23:07:47,118] Trial 11 finished with value: 0.7005202174186707 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:07:47,118 - Trial 11 finished with value: 0.7005202174186707 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs: 100%|██████████| 20/20 [02:25<00:00,  7.28s/it]

2025-07-11 23:11:56,877 - Validation RMSE: 0.1352
2025-07-11 23:11:56,941 -   Mean error: 0.1351
2025-07-11 23:11:56,942 -   Mean all prediction: 1.0021
2025-07-11 23:11:56,942 -   Mean above prediction: 1.0047
2025-07-11 23:11:56,943 -   Mean below prediction: 1.0065
2025-07-11 23:11:56,943 -   True values above zero: 0.0100
2025-07-11 23:11:56,944 -   True values below zero: 0.0101
2025-07-11 23:11:56,945 - Trial number 12
2025-07-11 23:11:56,945 - All scores (test) = [np.float32(2.0111732)]
2025-07-11 23:11:56,945 - Log mean of scores (test) 0.698718249797821



[I 2025-07-11 23:11:57,383] Trial 12 finished with value: 0.698718249797821 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:11:57,383 - Trial 12 finished with value: 0.698718249797821 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs:  90%|█████████ | 18/20 [02:17<00:15,  7.64s/it]

2025-07-11 23:15:57,964 - Validation RMSE: 0.1471
2025-07-11 23:15:58,028 -   Mean error: 0.1468
2025-07-11 23:15:58,028 -   Mean all prediction: 1.0027
2025-07-11 23:15:58,029 -   Mean above prediction: 1.0033
2025-07-11 23:15:58,029 -   Mean below prediction: 1.0068
2025-07-11 23:15:58,030 -   True values above zero: 0.0100
2025-07-11 23:15:58,030 -   True values below zero: 0.0101





2025-07-11 23:15:58,031 - Trial number 13
2025-07-11 23:15:58,032 - All scores (test) = [np.float32(2.0101764)]
2025-07-11 23:15:58,033 - Log mean of scores (test) 0.6982225179672241


[I 2025-07-11 23:15:58,454] Trial 13 finished with value: 0.6982225179672241 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:15:58,454 - Trial 13 finished with value: 0.6982225179672241 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 8, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs:  20%|██        | 4/20 [00:35<02:23,  9.00s/it]

2025-07-11 23:18:13,273 - Validation RMSE: 0.1711
2025-07-11 23:18:13,333 -   Mean error: 0.1704
2025-07-11 23:18:13,333 -   Mean all prediction: 1.0021
2025-07-11 23:18:13,334 -   Mean above prediction: 1.0016
2025-07-11 23:18:13,335 -   Mean below prediction: 1.0060
2025-07-11 23:18:13,335 -   True values above zero: 0.0100
2025-07-11 23:18:13,336 -   True values below zero: 0.0101
2025-07-11 23:18:13,336 - Trial number 14
2025-07-11 23:18:13,337 - All scores (test) = [np.float32(2.0076594)]
2025-07-11 23:18:13,338 - Log mean of scores (test) 0.6969695687294006



[I 2025-07-11 23:18:13,752] Trial 14 finished with value: 0.6969695687294006 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 11, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:18:13,752 - Trial 14 finished with value: 0.6969695687294006 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 11, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs:  70%|███████   | 14/20 [01:49<00:47,  7.85s/it]

2025-07-11 23:21:44,400 - Validation RMSE: 0.1078





2025-07-11 23:21:44,462 -   Mean error: 0.1077
2025-07-11 23:21:44,463 -   Mean all prediction: 1.0016
2025-07-11 23:21:44,464 -   Mean above prediction: 1.0040
2025-07-11 23:21:44,465 -   Mean below prediction: 1.0018
2025-07-11 23:21:44,465 -   True values above zero: 0.0100
2025-07-11 23:21:44,466 -   True values below zero: 0.0101
2025-07-11 23:21:44,466 - Trial number 15
2025-07-11 23:21:44,467 - All scores (test) = [np.float32(2.0058055)]
2025-07-11 23:21:44,467 - Log mean of scores (test) 0.6960456967353821


[I 2025-07-11 23:21:44,945] Trial 15 finished with value: 0.6960456967353821 and parameters: {'idxAfterPrediction': 3, 'time_scaling_stretch': True, 'time_inc_factor': 7, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:21:44,945 - Trial 15 finished with value: 0.6960456967353821 and parameters: {'idxAfterPrediction': 3, 'time_scaling_stretch': True, 'time_inc_factor': 7, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs: 100%|██████████| 20/20 [02:27<00:00,  7.36s/it]

2025-07-11 23:25:55,607 - Validation RMSE: 0.0811





2025-07-11 23:25:55,670 -   Mean error: 0.0810
2025-07-11 23:25:55,671 -   Mean all prediction: 1.0016
2025-07-11 23:25:55,672 -   Mean above prediction: 1.0024
2025-07-11 23:25:55,673 -   Mean below prediction: 1.0045
2025-07-11 23:25:55,673 -   True values above zero: 0.0101
2025-07-11 23:25:55,674 -   True values below zero: 0.0101
2025-07-11 23:25:55,675 - Trial number 16
2025-07-11 23:25:55,675 - All scores (test) = [np.float32(2.0068624)]
2025-07-11 23:25:55,677 - Log mean of scores (test) 0.696572482585907


[I 2025-07-11 23:25:56,202] Trial 16 finished with value: 0.696572482585907 and parameters: {'idxAfterPrediction': 3, 'time_scaling_stretch': True, 'time_inc_factor': 5, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:25:56,202 - Trial 16 finished with value: 0.696572482585907 and parameters: {'idxAfterPrediction': 3, 'time_scaling_stretch': True, 'time_inc_factor': 5, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs:  20%|██        | 4/20 [00:36<02:24,  9.00s/it]

2025-07-11 23:28:12,926 - Validation RMSE: 0.2063





2025-07-11 23:28:12,989 -   Mean error: 0.2050
2025-07-11 23:28:12,990 -   Mean all prediction: 1.0027
2025-07-11 23:28:12,990 -   Mean above prediction: 1.0044
2025-07-11 23:28:12,991 -   Mean below prediction: 1.0079
2025-07-11 23:28:12,991 -   True values above zero: 0.0100
2025-07-11 23:28:12,992 -   True values below zero: 0.0101
2025-07-11 23:28:12,993 - Trial number 17
2025-07-11 23:28:12,993 - All scores (test) = [np.float32(2.0123305)]
2025-07-11 23:28:12,994 - Log mean of scores (test) 0.6992934942245483


[I 2025-07-11 23:28:13,438] Trial 17 finished with value: 0.6992934942245483 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 13, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:28:13,438 - Trial 17 finished with value: 0.6992934942245483 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 13, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


Epochs:  15%|█▌        | 3/20 [00:33<03:10, 11.18s/it]


2025-07-11 23:30:26,789 - Validation RMSE: 0.0207
2025-07-11 23:30:26,939 -   Mean error: 0.0213
2025-07-11 23:30:26,940 -   Mean all prediction: 1.0021
2025-07-11 23:30:26,941 -   Mean above prediction: 1.0053
2025-07-11 23:30:26,943 -   Mean below prediction: 1.0025
2025-07-11 23:30:26,943 -   True values above zero: 0.0100
2025-07-11 23:30:26,943 -   True values below zero: 0.0100
2025-07-11 23:30:26,944 - Trial number 18
2025-07-11 23:30:26,945 - All scores (test) = [np.float32(2.007771)]
2025-07-11 23:30:26,946 - Log mean of scores (test) 0.6970251798629761


[I 2025-07-11 23:30:27,357] Trial 18 finished with value: 0.6970251798629761 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 1, 'doFeatureReduce': False}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:30:27,357 - Trial 18 finished with value: 0.6970251798629761 and parameters: {'idxAfterPrediction': 4, 'time_scaling_stretch': True, 'time_inc_factor': 1, 'doFeatureReduce': False}. Best is trial 9 with value: 0.702448844909668.


Epochs: 100%|██████████| 20/20 [02:24<00:00,  7.23s/it]

2025-07-11 23:34:30,835 - Validation RMSE: 0.1175





2025-07-11 23:34:30,896 -   Mean error: 0.1174
2025-07-11 23:34:30,897 -   Mean all prediction: 1.0027
2025-07-11 23:34:30,897 -   Mean above prediction: 1.0037
2025-07-11 23:34:30,898 -   Mean below prediction: 1.0084
2025-07-11 23:34:30,898 -   True values above zero: 0.0100
2025-07-11 23:34:30,899 -   True values below zero: 0.0102
2025-07-11 23:34:30,900 - Trial number 19
2025-07-11 23:34:30,900 - All scores (test) = [np.float32(2.0121422)]
2025-07-11 23:34:30,901 - Log mean of scores (test) 0.699199914932251


[I 2025-07-11 23:34:31,318] Trial 19 finished with value: 0.699199914932251 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 6, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.


2025-07-11 23:34:31,318 - Trial 19 finished with value: 0.699199914932251 and parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 6, 'doFeatureReduce': True}. Best is trial 9 with value: 0.702448844909668.
2025-07-11 23:34:31,318 - Best parameters: {'idxAfterPrediction': 5, 'time_scaling_stretch': True, 'time_inc_factor': 6, 'doFeatureReduce': True}
2025-07-11 23:34:31,318 - Best score: 0.702448844909668
2025-07-11 23:34:31,322 - 
Trials DataFrame:
2025-07-11 23:34:31,327 -     number     value             datetime_start          datetime_complete               duration  params_doFeatureReduce  params_idxAfterPrediction  params_time_inc_factor  params_time_scaling_stretch     state
7        7  0.693890 2025-07-11 22:54:41.355106 2025-07-11 22:56:57.187464 0 days 00:02:15.832358                   False                          3                      12                         True  COMPLETE
2        2  0.695312 2025-07-11 22:36:05.099387 2025-07-11 22:

In [8]:
df.to_parquet(f"{formatted_str}.parquet", index=False)