In [None]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import Trainer
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import wandb
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from config import num_epochs, learning_rate, wandb_config, model
from cyclical_preprocessing import train_loader, val_loader, test_loader, label_scaler

class StockPredictionModule(pl.LightningModule):
    def __init__(self, model, label_scaler, train_loader, val_loader, test_loader):
        super().__init__()
        self.model = model
        self.label_scaler = label_scaler
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("val_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("test_loss", loss)
        return loss

    def on_test_epoch_end(self):
        predictions, actuals = [], []
        for seqs, labels in self.test_loader:
            seqs, labels = seqs.to(self.device), labels.to(self.device)
            output = self(seqs)
            predictions.extend(output.view(-1).detach().cpu().numpy())
            actuals.extend(labels.view(-1).detach().cpu().numpy())

        predictions_rescaled = list(self.label_scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten())
        actuals_rescaled = list(self.label_scaler.inverse_transform(np.array(actuals).reshape(-1, 1)).flatten())
        baseline_rescaled = [actuals_rescaled[0]] + actuals_rescaled[:-1]
        baseline_constant = [0.0] * len(predictions_rescaled)

        len_test_set = len(predictions)

        fig, ax = plt.subplots(figsize=(15, 7))
        ax.plot(actuals_rescaled[-100:], label='Actual log returns', color='black', linestyle='-')
        ax.plot(predictions_rescaled[-100:], label='Predicted log returns', color='green', linestyle='-')
        # ax.plot(baseline_rescaled[-100:], label='Baseline_1', color='darkblue', linestyle='-')
        # ax.plot(baseline_constant[-100:], label='Baseline_2', color='steelblue', linestyle='-')
        # ax.plot(test_dates[-100:], arima_predictions[-100:], label='Baseline', color='orange', linestyle='-') 
        ax.set_title('Log returns prediction')
        ax.set_xlabel('Date')
        ax.set_ylabel('Log returns')
        ax.legend()
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
        plt.tight_layout()
        plt.show()
        filename = "plot.png"
        fig.savefig(filename)
        wandb.log({"Relative Difference Prediction": wandb.Image(filename)})
        os.remove(filename)
        plt.close(fig)

        actuals_rescaled = np.array(actuals_rescaled)
        predictions_rescaled = np.array(predictions_rescaled)
        baseline_rescaled = np.array(baseline_rescaled)

        model_mse = mean_squared_error(actuals_rescaled, predictions_rescaled)
        model_rmse = np.sqrt(model_mse)
        model_mae = mean_absolute_error(actuals_rescaled, predictions_rescaled)
        model_r2 = r2_score(actuals_rescaled, predictions_rescaled)
        model_mape = np.mean(np.abs((actuals_rescaled - predictions_rescaled) / (actuals_rescaled + 1e-8)))
        pct_change_labels = [label for label in actuals_rescaled]
        pct_change_predictions = [pred for pred in predictions_rescaled]
        hit_rate_model = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_predictions))
        
        baseline_mse = mean_squared_error(actuals_rescaled, baseline_rescaled)
        baseline_rmse = np.sqrt(baseline_mse)
        baseline_mae = mean_absolute_error(actuals_rescaled, baseline_rescaled)
        baseline_r2 = r2_score(actuals_rescaled, baseline_rescaled)
        baseline_mape = np.mean(np.abs((actuals_rescaled - baseline_rescaled) / (actuals_rescaled + 1e-8)))
        pct_change_baseline = [base for base in baseline_rescaled]
        hit_rate_baseline = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_baseline))

        model_metrics = {
            "mse": model_mse,
            "rmse": model_rmse,
            "mae": model_mae,
            "mape": model_mape,
            "r2": model_r2,
            "hit_rate": hit_rate_model,
        }
        baseline_metrics = {
            "mse": baseline_mse,
            "rmse": baseline_rmse,
            "mae": baseline_mae,
            "mape": baseline_mape,
            "r2": baseline_r2,
            "hit_rate": hit_rate_baseline
        }
        model_baseline_performance_metrics = {
            "mse": round((baseline_mse / model_mse - 1) * 100, 2),
            "rmse": round((baseline_rmse / model_rmse - 1) * 100, 2),
            "mae": round((baseline_mae / model_mae - 1) * 100, 2),
            "mape": round((baseline_mape / model_mape - 1) * 100, 2),
            "r2": round((model_r2 / baseline_r2 - 1) * 100, 2),
            "hit_rate": round((hit_rate_model / hit_rate_baseline - 1) * 100, 2),
        }

        print("Preparing to log the table...")
        metrics_table = wandb.Table(columns=["metric", "model", "baseline", "model-baseline performance comparison [%]"])
        for metric in model_metrics.keys():
            metrics_table.add_data(metric, model_metrics[metric], baseline_metrics[metric], model_baseline_performance_metrics[metric])
        wandb.log({"metrics": metrics_table})

def main():
    seed_value = 42
    torch.manual_seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed_value)

    torch.set_float32_matmul_precision("medium")
    wandb_logger = WandbLogger(project="cyclical_data_forecast", log_model="all", config=wandb_config)
    
    module = StockPredictionModule(model=model, label_scaler=label_scaler, train_loader=train_loader, val_loader=val_loader, test_loader=test_loader)

    if torch.cuda.is_available(): 
        accelerator = "gpu"
        devices = 1
    elif torch.backends.mps.is_built():
        accelerator = "mps"
        devices = 1
    else:
        accelerator = None
        devices = None

    trainer = Trainer(max_epochs=num_epochs, logger=wandb_logger, accelerator=accelerator, devices=devices, enable_checkpointing=True)
    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    trainer.test(dataloaders=test_loader, ckpt_path="best")

    wandb.finish()

if __name__ == "__main__":
    main()



---
### Hyperparameter Optimization (Objective: Minimize validation loss)

In [1]:
import torch
import optuna
import numpy as np
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from models import TransformerModel
from config import num_features, model_config, device, seq_length, architecture, wandb_config, num_epochs
from cyclical_preprocessing import train_loader, val_loader, test_loader, label_scaler
import wandb

class StockPredictionModule(LightningModule):
    def __init__(self, model, label_scaler, train_loader, val_loader, test_loader):
        super().__init__()
        self.model = model
        self.label_scaler = label_scaler
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = torch.nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        labels = labels.detach().cpu().numpy()
        y_pred = y_pred.detach().cpu().numpy()

        labels_rescaled = self.label_scaler.inverse_transform(labels.reshape(-1, 1)).flatten()
        predictions_rescaled = self.label_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

        r2 = r2_score(labels_rescaled , predictions_rescaled)
        mse = mean_squared_error(labels_rescaled , predictions_rescaled)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(labels_rescaled , predictions_rescaled)
        mape = np.mean(np.abs((labels_rescaled  - predictions_rescaled) / (predictions_rescaled + 1e-8)))
        pct_change_labels = [label for label in labels_rescaled]
        pct_change_predictions = [prediction for prediction in predictions_rescaled]
        hit_rate = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_predictions))

        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_r2", r2, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mse", mse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_rmse", rmse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mae", mae, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mape", mape, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("hit_rate", hit_rate, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {"val_loss": loss, "val_r2": r2, "val_mse": mse, "val_rmse": rmse, "val_mae": mae, "val_mape": mape, "hit_rate": hit_rate}


def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    num_layers = trial.suggest_int('num_layers', 1, 3)
    num_heads = trial.suggest_categorical('num_heads', [2, 4, 8])
    hidden_size_multipliers = trial.suggest_int('hidden_size_multipliers', 1, 16)
    hidden_size = num_heads * hidden_size_multipliers
    dropout_prob = trial.suggest_float('dropout_prob', 0.0, 0.5)

    if hidden_size % num_heads != 0:
        raise optuna.exceptions.TrialPruned("hidden_size is not divisible by num_heads")

    model_config.update({
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "num_heads": num_heads,
        "dropout": dropout_prob
    })

    wandb_config = {
        "architecture": architecture,
        "learning_rate": learning_rate,
        "num_units": hidden_size,
        "num_layers": num_layers,
        "num_heads": num_heads,
        "dropout": dropout_prob,
        "seq_length": seq_length,
        "epochs": num_epochs
    }

    wandb.init(project="cyclical_hyperparameter_tuning_transformer", entity="frederik135", config=wandb_config, reinit=True)
    model = TransformerModel(**model_config).to(device)
    module = StockPredictionModule(model=model, label_scaler=label_scaler,
                                   train_loader=train_loader, val_loader=val_loader, test_loader=None)
    module.hparams.learning_rate = learning_rate

    if torch.cuda.is_available(): 
        accelerator = "gpu"
        devices = 1
    elif torch.backends.mps.is_built():
        accelerator = "mps"
        devices = 1
    else:
        accelerator = None
        devices = None

    wandb_logger = WandbLogger(project="cyclical_hyperparameter_tuning_transformer", log_model="all", config=wandb_config)
    trainer = Trainer(
        logger=wandb_logger,
        max_epochs=70,
        callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)],
        accelerator=accelerator,
        devices=devices,
        enable_checkpointing=False,
        enable_progress_bar=False
    )

    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    val_result = trainer.validate(module, dataloaders=val_loader, verbose=False)
    val_loss = val_result[0].get('val_loss', float('inf'))
    wandb.finish()
    return val_loss


seed_value = 42
torch.manual_seed(seed_value)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision("medium")

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print("Best hyperparameters: ", study.best_trial.params)

[I 2024-06-02 10:42:53,566] A new study created in memory with name: no-name-b84a5f77-ef01-4a2f-8049-b652ca88aa5f
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfrederik135[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 87.0 K
1 | criterion | MSELoss          | 0     
-----------------------------------------------
87.0 K    Trainable params
0         Non-trainable params
87.0 K    Total params
0.348     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=70` reached.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
hit_rate,▁▁▂▂▃▃▃▁▂▃▂▂▃▃▂▂▄▄▄▃▃▄▅▆▅▅▆▇▇▆▆▇▇████▇██
train_loss_epoch,█▅▆▃▂▄▅▆▅▄▃▅▃▄▃▃▂▅▅▂▃▄▅▃▅▅▂▁▂▁▂▁▂▂▁▃▂▂▂▂
train_loss_step,█▅▆▅▅▃▃▄▂▄▃▄▃▄▅▂▇▂▅▃▃▃▁▄▃▄▂▁▃▃▂▃▅▂▁▁▂▂▃▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
val_loss,█▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁
val_mae,█▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▁▁▂▂▁▂▁▁▁▁▂▁▁
val_mape,▁▁▁▁▂▁▁▁▁▁▃▂▁▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁
val_mse,█▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁
val_r2,▁▆▇▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇▇█▇████▇██

0,1
epoch,70.0
hit_rate,0.67521
train_loss_epoch,0.00025
train_loss_step,0.01604
trainer/global_step,8750.0
val_loss,0.01979
val_mae,0.03979
val_mape,6.93717
val_mse,0.00339
val_r2,0.12755


[I 2024-06-02 10:45:28,385] Trial 0 finished with value: 0.01979263871908188 and parameters: {'learning_rate': 2.3206582629665102e-05, 'num_layers': 2, 'num_heads': 2, 'hidden_size_multipliers': 5, 'dropout_prob': 0.2940441141780067}. Best is trial 0 with value: 0.01979263871908188.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011162767133334405, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 386 K 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
386 K     Trainable params
0         Non-trainable params
386 K     Total params
1.546     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.013 MB uploaded\r'), FloatProgress(value=0.08999343113641341, max=1.…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
hit_rate,▁▄▅▅▅▆▅▇▇▆▇▆▆▆▆▆▅▇█▅▅▆▆▆█▇▇█▆▆▇▇▆▇█▇▆█▇▆
train_loss_epoch,▂▃▆█▃▃▇▂▅▃▆▄▁█▄▅▄▅▃▄▅▃▃▂▄▁▄▃▃▁▃▄▁▃▄▅▃▄▄▂
train_loss_step,▂▇▆▅▂▆▃▃▅▄▆▇▆▂▅█▄▅▃▄▅▂▃▄▄▁▁▃▅▆▂▄▁▅▅▄▅▂▃▃
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,█▅▄▅▃▃▃▃▂▂▃▂▂▂▂▂▂▂▁▃▂▂▂▂▂▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁
val_mae,█▄▄▅▃▃▃▃▂▂▃▂▂▂▁▂▃▁▂▃▂▁▂▂▁▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁
val_mape,▃▂▂▂▄▁▅▂▁▂▄▂▁▆▂▂▂▂▅█▆▁▂▄▂▂▄▃▅▂▁▁▁▄▂▁▂▄▁▁
val_mse,█▅▄▅▃▃▃▃▂▂▃▂▂▂▂▂▂▂▁▃▂▂▂▂▂▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁
val_r2,▁▅▆▄▇▇▇▇▇▇▆█▇███▇█▇▆▇█▇▇███▇███████▇████

0,1
epoch,69.0
hit_rate,0.68483
train_loss_epoch,0.00023
train_loss_step,0.02092
trainer/global_step,8625.0
val_loss,0.01836
val_mae,0.03779
val_mape,4.04315
val_mse,0.00314
val_r2,0.22206


[I 2024-06-02 10:49:31,429] Trial 1 finished with value: 0.01836070790886879 and parameters: {'learning_rate': 2.1131427004628303e-05, 'num_layers': 3, 'num_heads': 2, 'hidden_size_multipliers': 15, 'dropout_prob': 0.04102967505433758}. Best is trial 1 with value: 0.01836070790886879.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011169037033331128, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 55.6 K
1 | criterion | MSELoss          | 0     
-----------------------------------------------
55.6 K    Trainable params
0         Non-trainable params
55.6 K    Total params
0.222     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
hit_rate,▆▃▁▂▅▅▄▅█▅▇▃▄▆▄▄▂▅▂▂
train_loss_epoch,▁▄▄▁▂▆▄▃▂▄▁▃▄▃▂▄█▂▆
train_loss_step,█▇▇▃▄▄▄▄▂▇▃▃▅▂▃▃▂▄▃▅▄▄▁▃▃▃▂▂▄▃▄▄▄▄▄▃▃▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁
val_mape,▁▁▁▁▂▂▂▇▃▂▄▁▂▁▂▁█▁▁▁
val_mse,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_r2,▁▅▆▇████████████████

0,1
epoch,19.0
hit_rate,0.53953
train_loss_epoch,0.00042
train_loss_step,0.01506
trainer/global_step,2375.0
val_loss,0.02422
val_mae,0.04489
val_mape,8.92362
val_mse,0.00414
val_r2,-0.10226


[I 2024-06-02 10:50:39,748] Trial 2 finished with value: 0.02421773597598076 and parameters: {'learning_rate': 2.0153782762766187e-05, 'num_layers': 3, 'num_heads': 2, 'hidden_size_multipliers': 2, 'dropout_prob': 0.32546031536770675}. Best is trial 1 with value: 0.01836070790886879.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011146500466667122, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 843 K 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
843 K     Trainable params
0         Non-trainable params
843 K     Total params
3.375     Total estimated model params size (MB)
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▃▃▃▃▃▆▆▆▆███
hit_rate,▁▆▆█
train_loss_epoch,█▅▁
train_loss_step,██▃▃▂▁▂▁▃
trainer/global_step,▁▂▂▂▃▄▄▄▄▅▆▇▇▇██
val_loss,█▅▄▁
val_mae,█▄▆▁
val_mape,▁█▂▃
val_mse,█▅▄▁
val_r2,▁▅▁█

0,1
epoch,3.0
hit_rate,0.66667
train_loss_epoch,0.00031
train_loss_step,0.02756
trainer/global_step,458.0
val_loss,0.02008
val_mae,0.03977
val_mape,9.85575
val_mse,0.00344
val_r2,0.14678


[I 2024-06-02 10:51:07,267] Trial 3 finished with value: 0.020084626972675323 and parameters: {'learning_rate': 0.0033723082125739724, 'num_layers': 3, 'num_heads': 8, 'hidden_size_multipliers': 8, 'dropout_prob': 0.09329579969309237}. Best is trial 1 with value: 0.01836070790886879.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011161284255556186, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 257 K 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
257 K     Trainable params
0         Non-trainable params
257 K     Total params
1.029     Total estimated model params size (MB)


In [None]:
print("Best hyperparameters: ", study.best_trial.params)