# Hyperparameter Tuning

Prep for Colab Load

In [None]:
from darts.models.forecasting.tft_model import TFTModel

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sun Feb 23 21:29:38 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [1]:
!pip install "darts[torch]"
!pip uninstall scikit-learn -y
!pip install --upgrade pip setuptools wheel
!pip install scikit-learn==1.2.2

!pip install optuna
!pip install "optuna-integration[pytorch_lightning]"

# Uninstall the broken Darts version
!pip uninstall darts -y

# Install the correct stable version (0.24.0)
!pip install darts==0.24.0

# Upgrade PyTorch Lightning to ensure compatibility
!pip install --upgrade lightning

!pip uninstall -y darts pytorch-lightning torch optuna
!pip install darts==0.24.0 "pytorch-lightning<2.0.0" torch optuna

# # Restart the kernel (Important for changes to take effect)
# import os
# os._exit(00)

Collecting darts[torch]
  Using cached darts-0.33.0-py3-none-any.whl.metadata (55 kB)
Using cached darts-0.33.0-py3-none-any.whl (972 kB)
Installing collected packages: darts
Successfully installed darts-0.33.0
Found existing installation: scikit-learn 1.2.2
Uninstalling scikit-learn-1.2.2:
  Successfully uninstalled scikit-learn-1.2.2
Collecting scikit-learn==1.2.2
  Using cached scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Using cached scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)
Installing collected packages: scikit-learn
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mlxtend 0.23.4 requires scikit-learn>=1.3.1, but you have scikit-learn 1.2.2 which is incompatible.
imbalanced-learn 0.13.0 requires scikit-learn<2,>=1.3.2, but you have scikit-learn 1.2.2 which is

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import warnings
import logging

# Visualization and numerical processing
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import yfinance as yf

# Statsmodels for time series analysis
from statsmodels.graphics.tsaplots import month_plot, quarter_plot, plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose

# Sklearn for metrics and parameter grid
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import ParameterGrid
from sklearn.preprocessing import StandardScaler

# Darts functions
from darts import TimeSeries, concatenate
from darts.dataprocessing.transformers import Scaler
from darts.models import TFTModel
from darts.utils.likelihood_models import QuantileRegression
from darts.utils.statistics import check_seasonality, plot_acf
from darts.utils.timeseries_generation import datetime_attribute_timeseries

# Disable warnings and logging
warnings.filterwarnings("ignore")
#logging.disable(logging.CRITICAL)

pd.set_option('display.max_rows',60000)
pd.set_option('display.max_columns',500)
pd.set_option('display.max_colwidth',200)
pd.options.display.float_format = '{:,.4f}'.format

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



If you are using a probabilistic model like Temporal Fusion Transformer (TFT) with a likelihood function (e.g., QuantileRegression), the training loss and validation loss might not behave in the same way as in deterministic models.

In [4]:
stock_data_ = pd.read_csv('/content/drive/MyDrive/projects_fp/dataset_250222_run.csv')
#stock_data_ = pd.read_csv('dataset_250222_run.csv')
stock_data_['date'] = pd.to_datetime(stock_data_['date'],utc=False)
stock_data_['date'] = stock_data_['date'].dt.tz_localize(None)
#stock_data_ = stock_data_[stock_data_['ticker_id'].isin(['NVDA','T'])].copy()  #uncheck this

for x in ['ticker_id','sector']: #'dividends'
    stock_data_[x+'_2'] = stock_data_[x].copy()

for x in ['ticker_id', 'sector']:
    stock_data_[x] = pd.factorize(stock_data_[x])[0]

stock_data = stock_data_.copy()

recon_mapper =  {a:b for a,b in set(zip(stock_data['ticker_id'],stock_data['ticker_id_2']))}

preferred_cols = ['volatility_atr_relative_pctc',
       'macd_sig_line_relative_close_pctc', 'macd_relative_close_pctc',
       'beta_close_relative_pctc', 'tr_relative_pctc', 'open_close_pctc',
       'volatility_bbm_relative', 'volatility_bbh_relative', 'momentum_rsi',
       'momentum_rsi_relative_pctc', 'pe_ratio_pctc', 'volatility_bb_width',
       'beta_vol_interaction_pctc', 'rolling_beta',
       'beta_close_pctc', 'beta_vol_interaction', 'vix', 'volume_pctc',
       'volatility_bb_width_pctc'] #'earnings_date_flag',

frequency = 'B'
fill_missing_dates_ = True

# Create TimeSeries objects grouped by ticker
series_ = TimeSeries.from_group_dataframe(
    df=stock_data,
    time_col='date',
    group_cols=['ticker_id'],      # Group by ticker
    value_cols= 'close_pctc',         # Use close price as the target
    freq=frequency,                  # Irregular trading days
    fill_missing_dates=fill_missing_dates_,   # No filling within Darts
    static_cols=['sector']  # Add sector as a static covariate
)

# Create dynamic covariates TimeSeries (volume) grouped by ticker
dynamic_covariates = TimeSeries.from_group_dataframe(
    df=stock_data,
    time_col='date',
    group_cols=['ticker_id'],      # Group by ticker
    value_cols=preferred_cols,   # Dynamic covariate (e.g., volume)'low' 'close_returns_lag_5' 'volume'
    freq=frequency,                   # Business day frequency
    fill_missing_dates=fill_missing_dates_     # Allow filling missing dates
)

# Create the future covariates time series instance from the full DataFrame using group-based columns
future_covariates = TimeSeries.from_group_dataframe(
    df=stock_data,           # Full DataFrame containing both current and future data
    time_col='date',    # Column representing the time dimension
    group_cols=['ticker_id'], # Column(s) representing the group or series identifier(s)
    value_cols=['earnings_date_flag'], # Columns representing the exogenous variables (future covariates)
    freq=frequency,                   # Business day frequency
    fill_missing_dates=fill_missing_dates_           # Frequency of the data
)

In [5]:
# Updated split_series function to handle multiple sequences
def split_series_collection(series_collection, train_ratio=0.6, val_ratio=0.2,type_ ='dynamic'):
    train_series = []
    val_series = []
    test_series = []
    combine_series = []

    for series in series_collection:
        # Apply the split logic to each individual sequence
        total_entries = len(series)
        train_size = int(total_entries * train_ratio)
        val_size = int(total_entries * val_ratio)
        test_size = total_entries - train_size - val_size

        train_series.append(series[:train_size])
        val_series.append(series[train_size:train_size + val_size])
        test_series.append(series[train_size + val_size:-40])# for static series only
        combine_series.append(series[:train_size+val_size]) #for forecast purpose

    return (train_series, val_series, test_series, combine_series)

def fit_scalers_on_series(series_collection):
    """
    Fits a separate StandardScaler for each TimeSeries in the collection.

    Args:
        series_collection (list[TimeSeries]): List of TimeSeries to fit scalers.

    Returns:
        list[StandardScaler]: List of fitted scalers, one for each TimeSeries.
    """
    scalers = []
    for ts in series_collection:
        scaler = StandardScaler()
        scaler.fit(ts.values())  # Fit scaler for each series independently
        scalers.append(scaler)
    return (scalers)

def scale_series_with_scalers(series_collection, scalers):
    """
    Scales each TimeSeries using its corresponding StandardScaler.

    Args:
        series_collection (list[TimeSeries]): List of TimeSeries to scale.
        scalers (list[StandardScaler]): List of fitted scalers, one for each TimeSeries.

    Returns:
        list[TimeSeries]: List of scaled TimeSeries.
    """
    scaled_series = []
    for ts, scaler in zip(series_collection, scalers):
        scaled_values = scaler.transform(ts.values())  # Transform each series
        scaled_series.append(TimeSeries.from_times_and_values(ts.time_index, scaled_values))
    return (scaled_series)


# Splitting datasets
train_series, val_series, test_series, combined_series = split_series_collection(series_)
train_dynamic_covariates, val_dynamic_covariates, test_dynamic_covariates, combined_dynamics = split_series_collection(dynamic_covariates)

# Fit scalers on training data
target_scalers = fit_scalers_on_series(train_series)
dynamic_scalers = fit_scalers_on_series(train_dynamic_covariates)

# Scale training, validation, and test sets
train_series_scaled = scale_series_with_scalers(train_series, target_scalers)
val_series_scaled = scale_series_with_scalers(val_series, target_scalers)
test_series_scaled = scale_series_with_scalers(test_series, target_scalers)
combined_series_scaled = scale_series_with_scalers(combined_series, target_scalers)

train_dynamic_covariates_scaled = scale_series_with_scalers(train_dynamic_covariates, dynamic_scalers)
val_dynamic_covariates_scaled = scale_series_with_scalers(val_dynamic_covariates, dynamic_scalers)
test_dynamic_covariates_scaled = scale_series_with_scalers(test_dynamic_covariates, dynamic_scalers)
combined_dynamic_covariates_scaled = scale_series_with_scalers(combined_dynamics, dynamic_scalers)

train_size = len(train_series[0])
val_size = len(val_series[0]) #434
test_size = len(test_series[0]) #435

In [6]:
#assuming no scaling cos 1-0. results may vary after run
def future_split_series(series_collection, train_size, val_size, test_size, output_chunk_length):
    """
    Splits future covariates into train, validation, and test splits,
    extending each split to include the forecast horizon (output_chunk_length).

    Args:
        series_collection (List[TimeSeries]): The collection of future covariate time series.
        train_size (int): Number of time steps in the training split.
        val_size (int): Number of time steps in the validation split.
        test_size (int): Number of time steps in the test split.
        output_chunk_length (int): Forecast horizon to ensure sufficient future covariates.

    Returns:
        Tuple[List[TimeSeries], List[TimeSeries], List[TimeSeries]]:
            - train_future_covariates
            - val_future_covariates
            - test_future_covariates
    """
    train_future_covariates = []
    val_future_covariates = []
    test_future_covariates = []

    for series in series_collection:
        total_length = len(series)

        # Train split
        train_end = train_size
        train_split = series[:train_end + output_chunk_length]  # Extend for forecast horizon
        train_future_covariates.append(train_split)

        # Validation split
        val_start = train_size
        val_end = train_size + val_size
        val_split = series[val_start:val_end + output_chunk_length]  # Extend for forecast horizon
        val_future_covariates.append(val_split)

        # Test split
        test_start = train_size + val_size
        #test_end = train_size + val_size #+ test_size #do not include test size
        test_split = series[test_start:]  # Extend for forecast horizon
        #test_split = series[test_start:test_end + output_chunk_length]
        test_future_covariates.append(test_split)

    return train_future_covariates, val_future_covariates, test_future_covariates

output_chunk_length_for_future = 40  # New Forecast horizon

# Split the future covariates
train_future_covariates, val_future_covariates, test_future_covariates = future_split_series(
    future_covariates, train_size, val_size, test_size, output_chunk_length_for_future
)

# Main Hyperparameter Run

In [7]:
from pytorch_lightning.callbacks import Callback
from torch.optim import Adam # Define the Adam optimizer class
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

from pytorch_lightning.loggers import TensorBoardLogger

import optuna
from optuna.integration import PyTorchLightningPruningCallback

from darts.models import TFTModel
from darts.utils.likelihood_models import QuantileRegression

In [8]:
# Create a TensorBoardLogger instance
tensorboard_logger = TensorBoardLogger(save_dir="logs", name="tft_optuna")

# Initialize global variable to track best validation loss
best_val_loss = float("inf")  # Initialize correctly
best_model_path = "/content/drive/MyDrive/best_tft_model_0224.pth"



def objective(trial):
    """
    Objective function for hyperparameter optimization using Optuna.
    Saves the best TFT model automatically.
    """
    global best_val_loss  # Track best loss across trials

    # default quantiles for QuantileRegression
    quantiles = [0.01, 0.05, 0.1, 0.15, 0.2, 0.25,
    0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85,
    0.9, 0.95, 0.99]

    # Define hyperparameters to optimize
    hidden_size = trial.suggest_categorical("hidden_size", [32, 64, 128])
    dropout = trial.suggest_float("dropout", 0.1, 0.4)
    learning_rate = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-4)

    model = TFTModel(
        input_chunk_length=80,
        output_chunk_length=40,
        hidden_size=hidden_size,
        hidden_continuous_size=32,  # if you want to keep this fixed
        lstm_layers=2,
        num_attention_heads=2,
        full_attention=True,
        dropout=dropout,
        batch_size=128,
        n_epochs=15,  # fewer epochs for quicker trials
        add_relative_index=False,
        feed_forward="GatedResidualNetwork",
        likelihood=QuantileRegression(quantiles=quantiles),
        optimizer_kwargs={
            "lr": learning_rate,
            "weight_decay": weight_decay
        },
        pl_trainer_kwargs={
            "accelerator": "gpu",
            "devices": 1,
            "logger": tensorboard_logger,
            "callbacks": [
                EarlyStopping(monitor="val_loss", patience=5),
                LearningRateMonitor(logging_interval="epoch"),
            ],
            "enable_progress_bar": False
        }
    )

    # -----------------------
    # 3) Train the model
    # -----------------------
    model.fit(
        series=train_series_scaled,
        past_covariates=train_dynamic_covariates_scaled,
        future_covariates=train_future_covariates,
        val_series=val_series_scaled,
        val_past_covariates=val_dynamic_covariates_scaled,
        val_future_covariates=val_future_covariates,
        verbose=False
    )

    # Get validation loss
    val_loss = model.trainer.callback_metrics.get("val_loss", None)
    val_loss = val_loss.item() if val_loss else float("inf")

    # Log progress
    trial.report(val_loss, step=trial.number)
    print(f"[Trial {trial.number}] Hidden Size: {hidden_size}, Dropout: {dropout}, "
          f"LR: {learning_rate:.6f}, Weight Decay: {weight_decay:.6f}, Val Loss: {val_loss}")

    # **Auto-Save the Best Model**
    if val_loss < best_val_loss:  # Fix: Compare with `best_val_loss`
        print(f" New Best Model Found! Saving... (Trial {trial.number})")
        best_val_loss = val_loss  # Update best loss
        model.save(best_model_path)  # Use Darts' built-in model saving

    # Stop if Optuna wants to prune the trial
    if trial.should_prune():
        print(f"Pruned Trial {trial.number}")
        raise optuna.TrialPruned()

    return val_loss

In [9]:
# Run Optuna Study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

# Get the best parameters
best_params = study.best_params
print(f"Best Parameters Found: {best_params}")

[I 2025-02-23 21:36:25,325] A new study created in memory with name: no-name-316e50af-e38d-4c9e-9c4f-ad9fcf19ce04
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.


[Trial 0] Hidden Size: 32, Dropout: 0.16414002908792602, LR: 0.000002, Weight Decay: 0.000001, Val Loss: 5.424081606848145
 New Best Model Found! Saving... (Trial 0)


[I 2025-02-23 22:01:35,321] Trial 0 finished with value: 5.424081606848145 and parameters: {'hidden_size': 32, 'dropout': 0.16414002908792602, 'learning_rate': 1.9891940264520184e-06, 'weight_decay': 1.4870667797385547e-06}. Best is trial 0 with value: 5.424081606848145.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.


[Trial 1] Hidden Size: 32, Dropout: 0.1611653877386216, LR: 0.000004, Weight Decay: 0.000002, Val Loss: 5.043634703412391
 New Best Model Found! Saving... (Trial 1)


[I 2025-02-23 22:26:40,521] Trial 1 finished with value: 5.043634703412391 and parameters: {'hidden_size': 32, 'dropout': 0.1611653877386216, 'learning_rate': 4.0992752348504546e-06, 'weight_decay': 1.7054174426574235e-06}. Best is trial 1 with value: 5.043634703412391.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
[

[Trial 2] Hidden Size: 32, Dropout: 0.2133276417301861, LR: 0.000002, Weight Decay: 0.000001, Val Loss: 5.587689722571653


INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.


[Trial 3] Hidden Size: 64, Dropout: 0.3399821482352452, LR: 0.000019, Weight Decay: 0.000046, Val Loss: 4.673216222045207
 New Best Model Found! Saving... (Trial 3)


[I 2025-02-23 23:48:19,297] Trial 3 finished with value: 4.673216222045207 and parameters: {'hidden_size': 64, 'dropout': 0.3399821482352452, 'learning_rate': 1.9381729236688928e-05, 'weight_decay': 4.57663413771757e-05}. Best is trial 3 with value: 4.673216222045207.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
[I 

[Trial 4] Hidden Size: 64, Dropout: 0.28592099810888855, LR: 0.000002, Weight Decay: 0.000001, Val Loss: 4.993650293773998


INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
[I 2025-02-24 01:40:56,937] Trial 5 finished with value: 4.888769650132915 and parameters: {'hidden_size': 64, 'dropout': 0.33323494801587483, 'learning_rate': 2.430333164077342e-06, 'weight_decay': 5.882077057687968e-06}. Best is trial 3 with value: 4.673216222045207.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[Trial 5] Hidden Size: 64, Dropout: 0.33323494801587483, LR: 0.000002, Weight Decay: 0.000006, Val Loss: 4.888769650132915


INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
[I 2025-02-24 02:37:17,289] Trial 6 finished with value: 4.918668485705218 and parameters: {'hidden_size': 64, 'dropout': 0.3127772961777112, 'learning_rate': 2.378208122353408e-06, 'weight_decay': 4.3844965933077904e-05}. Best is trial 3 with value: 4.673216222045207.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[Trial 6] Hidden Size: 64, Dropout: 0.3127772961777112, LR: 0.000002, Weight Decay: 0.000044, Val Loss: 4.918668485705218


INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
[I 2025-02-24 03:33:42,663] Trial 7 finished with value: 4.726956573241703 and parameters: {'hidden_size': 64, 'dropout': 0.3936994433163903, 'learning_rate': 2.148988071179243e-05, 'weight_decay': 6.105383781039944e-05}. Best is trial 3 with value: 4.673216222045207.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[Trial 7] Hidden Size: 64, Dropout: 0.3936994433163903, LR: 0.000021, Weight Decay: 0.000061, Val Loss: 4.726956573241703


INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
[I 2025-02-24 04:30:04,475] Trial 8 finished with value: 4.888454802772899 and parameters: {'hidden_size': 64, 'dropout': 0.38842828627715953, 'learning_rate': 2.1372632913556693e-06, 'weight_decay': 2.3159972183439576e-05}. Best is trial 3 with value: 4.673216222045207.
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[Trial 8] Hidden Size: 64, Dropout: 0.38842828627715953, LR: 0.000002, Weight Decay: 0.000023, Val Loss: 4.888454802772899


INFO: `Trainer.fit` stopped: `max_epochs=15` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=15` reached.
[I 2025-02-24 05:54:03,031] Trial 9 finished with value: 4.673561156063448 and parameters: {'hidden_size': 128, 'dropout': 0.39921736032832755, 'learning_rate': 7.0615820084286286e-06, 'weight_decay': 4.0118698139560036e-05}. Best is trial 3 with value: 4.673216222045207.


[Trial 9] Hidden Size: 128, Dropout: 0.39921736032832755, LR: 0.000007, Weight Decay: 0.000040, Val Loss: 4.673561156063448
Best Parameters Found: {'hidden_size': 64, 'dropout': 0.3399821482352452, 'learning_rate': 1.9381729236688928e-05, 'weight_decay': 4.57663413771757e-05}


In [None]:
# Mount Google Drive
drive.mount('/content/drive')
# Convert Study Trials to DataFrame
df = study.trials_dataframe()
#  Save CSV to Google Drive
csv_path = "/content/drive/MyDrive/optuna_tft_results_0224.csv"
df.to_csv(csv_path, index=False)

print(f" Optuna trial results saved to: {csv_path}")

import json

# Get the best parameters
best_params = study.best_params
print(f"Best Parameters Found: {best_params}")

# Define path to save in Google Drive
params_path = "/content/drive/MyDrive/best_tft_params_0224.json"

# Save parameters as a JSON file
with open(params_path, "w") as f:
    json.dump(best_params, f, indent=4)

print(f"Best parameters saved at: {params_path}")

## STOP HERE ###

# = = =