In [None]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import Trainer
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import wandb
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from config import num_epochs, learning_rate, wandb_config, model
# from preprocessing import stock_df
from preprocessing import train_loader, val_loader, test_loader, label_scaler

class StockPredictionModule(pl.LightningModule):
    def __init__(self, model, label_scaler, train_loader, val_loader, test_loader):
        super().__init__()
        self.model = model
        self.label_scaler = label_scaler
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = nn.MSELoss()
        # self.criterion = torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0)

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("val_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("test_loss", loss)
        return loss

    def on_test_epoch_end(self):
        predictions, actuals = [], []
        for seqs, labels in self.test_loader:
            seqs, labels = seqs.to(self.device), labels.to(self.device)
            output = self(seqs)
            predictions.extend(output.view(-1).detach().cpu().numpy())
            actuals.extend(labels.view(-1).detach().cpu().numpy())

        predictions_rescaled = list(self.label_scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten())
        actuals_rescaled = list(self.label_scaler.inverse_transform(np.array(actuals).reshape(-1, 1)).flatten())
        baseline_rescaled = [actuals_rescaled[0]] + actuals_rescaled[:-1]
        baseline_constant = [0.0] * len(predictions_rescaled)

        len_test_set = len(predictions)

        fig, ax = plt.subplots(figsize=(15, 7))
        ax.plot(actuals_rescaled[-100:], label='Actual log returns', color='black', linestyle='-')
        ax.plot(predictions_rescaled[-100:], label='Predicted log returns', color='green', linestyle='-')
        # ax.plot(baseline_rescaled[-100:], label='Baseline_1', color='darkblue', linestyle='-')
        # ax.plot(baseline_constant[-100:], label='Baseline_2', color='steelblue', linestyle='-')
        # ax.plot(test_dates[-100:], arima_predictions[-100:], label='Baseline', color='orange', linestyle='-') 
        ax.set_title('Log returns prediction')
        ax.set_xlabel('Date')
        ax.set_ylabel('Log returns')
        ax.legend()
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
        plt.tight_layout()
        plt.show()
        filename = "plot.png"
        fig.savefig(filename)
        wandb.log({"Relative Difference Prediction": wandb.Image(filename)})
        os.remove(filename)
        plt.close(fig)
        
        """
        # Use plotting with rebasing to visualize the predictions as prices
        rebase_period = 30
        predicted_prices = [actual_closing_prices[0]]
        for i, relative_change in enumerate(predictions_rescaled[1:], 1):
            if i % rebase_period == 0:
                predicted_prices.append(actual_closing_prices[i])
            else:
                predicted_prices.append(predicted_prices[-1] * relative_change)
        baseline_prices = [actual_closing_prices[0]] + list(actual_closing_prices[:-1])

        fig, ax = plt.subplots(figsize=(15, 7))
        ax.plot(test_dates, actual_closing_prices, label='Actual Price', color='black', linestyle='-')
        ax.plot(test_dates, predicted_prices, label='Predicted Price', color='green', linestyle='-')
        ax.plot(test_dates, baseline_prices, label='Baseline', color='blue', linestyle='-') 
        ax.set_title('Price predictions based on last price in validation set')
        ax.set_xlabel('Date')
        ax.set_ylabel('Stock Price')
        ax.legend()
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
        plt.tight_layout()
        plt.show()
        filename = "plot.png"
        fig.savefig(filename)
        wandb.log({"Stock Price Prediction": wandb.Image(filename)})
        os.remove(filename)
        plt.close(fig)
        """

        """
        # We use for the first value the actual closing price and then multiply the relative change for each following timestep
        actual_prices = [actual_closing_prices[0]]
        for i in range(1, len(actuals_rescaled)):
            actual_prices.append(actual_prices[i-1] * actuals_rescaled[i])
        prediction_prices = [actual_closing_prices[0]]
        for i in range(1, len(predictions_rescaled)):
            prediction_prices.append(prediction_prices[i-1] * predictions_rescaled[i])
        baseline_prices = [actual_prices[0]] + actual_prices[:-1]
        fig, ax = plt.subplots(figsize=(15, 7))
        ax.plot(test_dates, actual_prices, label='Actual Price', color='black', linestyle='-')
        ax.plot(test_dates, prediction_prices, label='Predicted Price', color='green', linestyle='-')
        ax.plot(test_dates, baseline_prices, label='Baseline', color='blue', linestyle='-')
        ax.set_title('Stock Price Prediction')
        ax.set_xlabel('Date')
        ax.set_ylabel('Stock Price')
        ax.legend()
        plt.setp(ax.get_xticklabels(), rotation=45, ha="right")
        plt.tight_layout()
        plt.show()
        filename = "plot.png"
        fig.savefig(filename)
        wandb.log({"Stock Price Prediction": wandb.Image(filename)})
        os.remove(filename)
        plt.close(fig)
        """
        
        net_abs_dev = torch.tensor([abs(predictions_rescaled[i] - actuals_rescaled[i]) for i in range(len(actuals_rescaled))])
        baseline_abs_dev = torch.tensor([abs(baseline_rescaled[i] - actuals_rescaled[i]) for i in range(len(actuals_rescaled))])
        diff_pos = torch.relu(baseline_abs_dev - net_abs_dev).reshape(-1).tolist()
        diff_min = (-torch.relu(net_abs_dev - baseline_abs_dev)).reshape(-1).tolist()
        fig, ax = plt.subplots(figsize=(10, 5))
        ax.set_title('Model vs baseline performance comparison on test samples')
        ax.hlines(0, xmin=0, xmax=len(actuals_rescaled), linestyles='dashed', colors='black')
        ax.bar(list(range(len(actuals_rescaled))), diff_pos, color='g', label='Model Wins', width=1.0)
        ax.bar(list(range(len(actuals_rescaled))), diff_min, color='r', label='Baseline Wins', width=1.0)
        ax.legend()
        ax.set_xlabel('Test Sample Index')
        ax.set_ylabel('Difference in Absolute Deviation')
        plt.show()
        filename = "comparison_plot.png"
        fig.savefig(filename)
        wandb.log({"Model vs Baseline Performance Comparison": wandb.Image(filename)})
        os.remove(filename)
        plt.close(fig)

        model_actual_dev = torch.tensor([predictions_rescaled[i] - actuals_rescaled[i] for i in range(len(actuals_rescaled))])
        fig, ax = plt.subplots(figsize=(10, 5))
        ax.set_title('Model deviations from actuals')
        ax.hlines(0, xmin=0, xmax=len(actuals_rescaled), linestyles='dashed', colors='black')
        ax.bar(list(range(len(actuals_rescaled))), model_actual_dev, color='g', label='Model Wins', width=1.0)
        ax.set_xlabel('Test Sample Index')
        ax.set_ylabel('Deviation from actuals')
        plt.show()
        filename = "comparison_plot.png"
        fig.savefig(filename)
        wandb.log({"Model deviations from actuals": wandb.Image(filename)})
        os.remove(filename)
        plt.close(fig)

        baseline_actual_dev = torch.tensor([baseline_rescaled[i] - actuals_rescaled[i] for i in range(len(actuals_rescaled))])
        fig, ax = plt.subplots(figsize=(10, 5))
        ax.set_title('Baseline deviations from actuals')
        ax.hlines(0, xmin=0, xmax=len(actuals_rescaled), linestyles='dashed', colors='black')
        ax.bar(list(range(len(actuals_rescaled))), baseline_actual_dev, color='b', label='Baseline Wins', width=1.0)
        ax.set_xlabel('Test Sample Index')
        ax.set_ylabel('Deviation from actuals')
        plt.show()
        filename = "comparison_plot.png"
        fig.savefig(filename)
        wandb.log({"Baseline deviations from actuals": wandb.Image(filename)})
        os.remove(filename)
        plt.close(fig)

        actuals_rescaled = np.array(actuals_rescaled)
        predictions_rescaled = np.array(predictions_rescaled)
        baseline_rescaled = np.array(baseline_rescaled)

        model_mse = mean_squared_error(actuals_rescaled, predictions_rescaled)
        model_rmse = np.sqrt(model_mse)
        model_mae = mean_absolute_error(actuals_rescaled, predictions_rescaled)
        model_r2 = r2_score(actuals_rescaled, predictions_rescaled)
        model_mape = np.mean(np.abs((actuals_rescaled - predictions_rescaled) / (actuals_rescaled + 1e-8)))
        pct_change_labels = [label for label in actuals_rescaled]
        pct_change_predictions = [pred for pred in predictions_rescaled]
        hit_rate_model = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_predictions))
        
        baseline_mse = mean_squared_error(actuals_rescaled, baseline_rescaled)
        baseline_rmse = np.sqrt(baseline_mse)
        baseline_mae = mean_absolute_error(actuals_rescaled, baseline_rescaled)
        baseline_r2 = r2_score(actuals_rescaled, baseline_rescaled)
        baseline_mape = np.mean(np.abs((actuals_rescaled - baseline_rescaled) / (actuals_rescaled + 1e-8)))
        pct_change_baseline = [base for base in baseline_rescaled]
        hit_rate_baseline = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_baseline))

        model_metrics = {
            "mse": model_mse,
            "rmse": model_rmse,
            "mae": model_mae,
            "mape": model_mape,
            "r2": model_r2,
            "hit_rate": hit_rate_model,
        }
        baseline_metrics = {
            "mse": baseline_mse,
            "rmse": baseline_rmse,
            "mae": baseline_mae,
            "mape": baseline_mape,
            "r2": baseline_r2,
            "hit_rate": hit_rate_baseline
        }
        model_baseline_performance_metrics = {
            "mse": round((baseline_mse / model_mse - 1) * 100, 2),
            "rmse": round((baseline_rmse / model_rmse - 1) * 100, 2),
            "mae": round((baseline_mae / model_mae - 1) * 100, 2),
            "mape": round((baseline_mape / model_mape - 1) * 100, 2),
            "r2": round((model_r2 / baseline_r2 - 1) * 100, 2),
            "hit_rate": round((hit_rate_model / hit_rate_baseline - 1) * 100, 2),
        }

        print("Preparing to log the table...")
        metrics_table = wandb.Table(columns=["metric", "model", "baseline", "model-baseline performance comparison [%]"])
        for metric in model_metrics.keys():
            metrics_table.add_data(metric, model_metrics[metric], baseline_metrics[metric], model_baseline_performance_metrics[metric])
        wandb.log({"metrics": metrics_table})

def main():
    seed_value = 42
    torch.manual_seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed_value)

    torch.set_float32_matmul_precision("medium")
    wandb_logger = WandbLogger(project="RNN_single_step_forecasts", log_model="all", config=wandb_config)
    
    module = StockPredictionModule(model=model, label_scaler=label_scaler, train_loader=train_loader, val_loader=val_loader, test_loader=test_loader)

    if torch.cuda.is_available(): 
        accelerator = "gpu"
        devices = 1
    elif torch.backends.mps.is_built():
        accelerator = "mps"
        devices = 1
    else:
        accelerator = None
        devices = None

    trainer = Trainer(max_epochs=num_epochs, logger=wandb_logger, accelerator=accelerator, devices=devices, enable_checkpointing=True)
    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    trainer.test(dataloaders=test_loader, ckpt_path="best")

    wandb.finish()

if __name__ == "__main__":
    main()



---
### Hyperparameter Optimization (Objective: Minimize validation loss)

In [None]:
import optuna
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
import torch
from models import LSTM, GRU, FCNN
from config import model_config, device, seq_length, architecture
from preprocessing import train_loader, val_loader, test_loader, label_scaler
import wandb

class StockPredictionModule(LightningModule):
    def __init__(self, model, label_scaler, train_loader, val_loader, test_loader):
        super().__init__()
        self.model = model
        self.label_scaler = label_scaler
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = torch.nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        labels = labels.detach().cpu().numpy()
        y_pred = y_pred.detach().cpu().numpy()

        labels_rescaled = self.label_scaler.inverse_transform(labels.reshape(-1, 1)).flatten()
        predictions_rescaled = self.label_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

        r2 = r2_score(labels_rescaled , predictions_rescaled)
        mse = mean_squared_error(labels_rescaled , predictions_rescaled)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(labels_rescaled , predictions_rescaled)
        mape = np.mean(np.abs((labels_rescaled  - predictions_rescaled) / (predictions_rescaled + 1e-8)))
        pct_change_labels = [label - 1 for label in labels_rescaled]
        pct_change_predictions = [prediction - 1 for prediction in predictions_rescaled]
        hit_rate = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_predictions))

        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_r2", r2, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mse", mse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_rmse", rmse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mae", mae, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mape", mape, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("hit_rate", hit_rate, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {"val_loss": loss, "val_r2": r2, "val_mse": mse, "val_rmse": rmse, "val_mae": mae, "val_mape": mape, "hit_rate": hit_rate}


def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    num_layers = trial.suggest_int('num_layers', 1, 3)
    hidden_size = trial.suggest_int('hidden_size', 16, 128)
    dropout_prob = trial.suggest_float('dropout_prob', 0.0, 0.5)

    model_config.update({
        "hidden_layer_size": hidden_size,
        "num_layers": num_layers,
        "dropout_prob": dropout_prob
    })

    wandb_config = {
        "architecture": architecture,
        "learning_rate": learning_rate,
        "num_units": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout_prob,
        "seq_length": seq_length,
        "epochs": 50
    }

    wandb.init(project="optuna_hyperparameter_tuning", entity="frederik135", config=wandb_config, reinit=True)
    model = GRU(**model_config).to(device)
    module = StockPredictionModule(model=model, label_scaler=label_scaler, 
                                   train_loader=train_loader, val_loader=val_loader, test_loader=None)
    module.hparams.learning_rate = learning_rate

    accelerator = "auto"
    devices = 1 if torch.cuda.is_available() or torch.backends.mps.is_built() else None

    wandb_logger = WandbLogger(project="optuna_hyperparameter_tuning", log_model="all", config=wandb_config)
    trainer = Trainer(
        logger=wandb_logger,
        max_epochs=70,
        callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)],
        accelerator=accelerator,
        devices=devices,
        enable_checkpointing=False,
        enable_progress_bar=False
    )

    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    val_result = trainer.validate(module, dataloaders=val_loader, verbose=False)
    val_loss = val_result[0].get('val_loss', float('inf'))
    wandb.finish()
    return val_loss


seed_value = 42
torch.manual_seed(seed_value)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision("medium")

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print("Best hyperparameters: ", study.best_trial.params)

In [None]:
print("Best hyperparameters: ", study.best_trial.params)

### Hyperparameter Optimization (Objective: Maximize R2 Score)

In [None]:
import optuna
import numpy as np
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
import torch
from models import LSTM, GRU, FCNN
from config import model_config, device, seq_length, architecture
from preprocessing import train_loader, val_loader, test_loader, label_scaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import wandb

class StockPredictionModule(LightningModule):
    def __init__(self, model, label_scaler, train_loader, val_loader, test_loader):
        super().__init__()
        self.model = model
        self.label_scaler = label_scaler
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = torch.nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        labels = labels.detach().cpu().numpy()
        y_pred = y_pred.detach().cpu().numpy()
        r2 = r2_score(labels, y_pred)
        mse = mean_squared_error(labels, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(labels, y_pred)
        mape = np.mean(np.abs((labels - y_pred) / (y_pred + 1e-8)))

        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_r2", r2, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mse", mse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_rmse", rmse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mae", mae, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mape", mape, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {"val_loss": loss, "val_r2": r2, "val_mse": mse, "val_rmse": rmse, "val_mae": mae, "val_mape": mape}

def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    num_layers = trial.suggest_int('num_layers', 1, 3)
    hidden_size = trial.suggest_int('hidden_size', 16, 128)
    dropout_prob = trial.suggest_float('dropout_prob', 0.0, 0.5)

    model_config.update({
        "hidden_layer_size": hidden_size,
        "num_layers": num_layers,
        "dropout_prob": dropout_prob
    })

    wandb_config = {
        "learning_rate": learning_rate,
        "num_units": hidden_size,
        "num_layers": num_layers,
        "dropout": dropout_prob,
        "seq_length": seq_length,
        "epochs": 50
    }

    wandb.init(project="optuna_hyperparameter_tuning", entity="frederik135", config=wandb_config, reinit=True)
    model = GRU(**model_config).to(device)
    module = StockPredictionModule(model=model, label_scaler=label_scaler, 
                                   train_loader=train_loader, val_loader=val_loader, test_loader=test_loader)
    module.hparams.learning_rate = learning_rate

    accelerator = "auto"
    devices = 1 if torch.cuda.is_available() or torch.backends.mps.is_built() else None

    wandb_logger = WandbLogger(project="optuna_hyperparameter_tuning", log_model="all")
    trainer = Trainer(
        logger=wandb_logger,
        max_epochs=70,
        callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)],
        accelerator=accelerator,
        devices=devices,
        enable_checkpointing=False,
        enable_progress_bar=False
    )

    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    val_result = trainer.validate(module, dataloaders=val_loader, verbose=False)
    val_loss = val_result[0].get('val_loss', float('inf'))
    val_r2 = val_result[0].get('val_r2', float('-inf')) 

    wandb.finish()

    return -np.exp(val_r2 + 1.0)

seed_value = 42
torch.manual_seed(seed_value)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision("medium")

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print("Best hyperparameters: ", study.best_trial.params)

In [None]:
print("Best hyperparameters: ", study.best_trial.params)

### Hyperparameter Optimization for FCNN (Objective: Minimize validation loss)

In [None]:
import optuna
import torch
import torch.nn as nn
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
from models import FCNN_model
from config import device, seq_length, num_features
from preprocessing import train_loader, val_loader, label_scaler
import wandb
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

class StockPredictionModule(LightningModule):
    def __init__(self, model, label_scaler, train_loader, val_loader, test_loader):
        super().__init__()
        self.model = model
        self.label_scaler = label_scaler
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = torch.nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        labels = labels.detach().cpu().numpy()
        y_pred = y_pred.detach().cpu().numpy()

        labels_rescaled = self.label_scaler.inverse_transform(labels.reshape(-1, 1)).flatten()
        predictions_rescaled = self.label_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

        r2 = r2_score(labels_rescaled , predictions_rescaled)
        mse = mean_squared_error(labels_rescaled , predictions_rescaled)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(labels_rescaled , predictions_rescaled)
        mape = np.mean(np.abs((labels_rescaled  - predictions_rescaled) / (predictions_rescaled + 1e-8)))
        pct_change_labels = [label - 1 for label in labels_rescaled]
        pct_change_predictions = [prediction - 1 for prediction in predictions_rescaled]
        hit_rate = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_predictions))

        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_r2", r2, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mse", mse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_rmse", rmse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mae", mae, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mape", mape, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("hit_rate", hit_rate, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {"val_loss": loss, "val_r2": r2, "val_mse": mse, "val_rmse": rmse, "val_mae": mae, "val_mape": mape, "hit_rate": hit_rate}


def objective(trial):
    num_hidden_layers = trial.suggest_int('num_hidden_layers', 1, 3)
    hidden_layers = [trial.suggest_int(f'hidden_size_{i}', 32, 128) for i in range(num_hidden_layers)]
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    dropout_prob = trial.suggest_float('dropout_prob', 0.0, 0.5)

    model_config = {
        "seq_length": seq_length,
        "num_features": num_features,
        "hidden_layers": hidden_layers,
        "n_out": 1, 
        "dropout_prob": dropout_prob
    }

    wandb_config = {
        "architecture": "FCNN",
        "learning_rate": learning_rate,
        "hidden_layers": hidden_layers,
        "dropout_prob": dropout_prob,
        "seq_length": seq_length,
        "epochs": 50
    }

    wandb.init(project="fcnn_hyperparameter_test", entity="frederik135", config=wandb_config, reinit=True)

    model = FCNN_model(**model_config).to(device)
    module = StockPredictionModule(model=model, label_scaler=label_scaler, 
                                   train_loader=train_loader, val_loader=val_loader, test_loader=None)
    module.hparams.learning_rate = learning_rate

    if torch.cuda.is_available():
        accelerator = "gpu"
        devices = 1
    elif hasattr(torch, 'has_mps') and torch.backends.mps.is_built():
        accelerator = "mps"
        devices = 1
    else:
        accelerator = None
        devices = None

    wandb_logger = WandbLogger(project="fcnn_hyperparameter_test", log_model="all", config=wandb_config)
    trainer = Trainer(
        logger=wandb_logger,
        max_epochs=70,
        callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)],
        accelerator=accelerator,
        devices=devices,
        enable_checkpointing=False,
        enable_progress_bar=False
    )

    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    val_result = trainer.validate(module, dataloaders=val_loader, verbose=False)
    val_loss = val_result[0].get('val_loss', float('inf'))

    wandb.finish()
    return val_loss

seed_value = 42
torch.manual_seed(seed_value)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision("medium")

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print("Best hyperparameters: ", study.best_trial.params)


### Hyperparameter Optimization: Transformer

In [1]:
import torch
import optuna
import numpy as np
from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import WandbLogger
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from models import TransformerModel
from config import num_features, model_config, device, seq_length, architecture, wandb_config, num_epochs
from preprocessing import train_loader, val_loader, test_loader, label_scaler
import wandb

class StockPredictionModule(LightningModule):
    def __init__(self, model, label_scaler, train_loader, val_loader, test_loader):
        super().__init__()
        self.model = model
        self.label_scaler = label_scaler
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.criterion = torch.nn.MSELoss()

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        seqs, labels = batch
        y_pred = self(seqs)
        loss = self.criterion(y_pred, labels)
        labels = labels.detach().cpu().numpy()
        y_pred = y_pred.detach().cpu().numpy()

        labels_rescaled = self.label_scaler.inverse_transform(labels.reshape(-1, 1)).flatten()
        predictions_rescaled = self.label_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

        r2 = r2_score(labels_rescaled , predictions_rescaled)
        mse = mean_squared_error(labels_rescaled , predictions_rescaled)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(labels_rescaled , predictions_rescaled)
        mape = np.mean(np.abs((labels_rescaled  - predictions_rescaled) / (predictions_rescaled + 1e-8)))
        pct_change_labels = [label for label in labels_rescaled]
        pct_change_predictions = [prediction for prediction in predictions_rescaled]
        hit_rate = np.mean(np.sign(pct_change_labels) == np.sign(pct_change_predictions))

        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_r2", r2, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mse", mse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_rmse", rmse, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mae", mae, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_mape", mape, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("hit_rate", hit_rate, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return {"val_loss": loss, "val_r2": r2, "val_mse": mse, "val_rmse": rmse, "val_mae": mae, "val_mape": mape, "hit_rate": hit_rate}


def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    num_layers = trial.suggest_int('num_layers', 1, 3)
    num_heads = trial.suggest_categorical('num_heads', [2, 4, 8])
    hidden_size_multipliers = trial.suggest_int('hidden_size_multipliers', 1, 16)
    hidden_size = num_heads * hidden_size_multipliers
    dropout_prob = trial.suggest_float('dropout_prob', 0.0, 0.5)

    if hidden_size % num_heads != 0:
        raise optuna.exceptions.TrialPruned("hidden_size is not divisible by num_heads")

    model_config.update({
        "hidden_size": hidden_size,
        "num_layers": num_layers,
        "num_heads": num_heads,
        "dropout": dropout_prob
    })

    wandb_config = {
        "architecture": architecture,
        "learning_rate": learning_rate,
        "num_units": hidden_size,
        "num_layers": num_layers,
        "num_heads": num_heads,
        "dropout": dropout_prob,
        "seq_length": seq_length,
        "epochs": num_epochs
    }

    wandb.init(project="cyclical_hyperparameter_tuning_transformer", entity="frederik135", config=wandb_config, reinit=True)
    model = TransformerModel(**model_config).to(device)
    module = StockPredictionModule(model=model, label_scaler=label_scaler,
                                   train_loader=train_loader, val_loader=val_loader, test_loader=None)
    module.hparams.learning_rate = learning_rate

    if torch.cuda.is_available(): 
        accelerator = "gpu"
        devices = 1
    elif torch.backends.mps.is_built():
        accelerator = "mps"
        devices = 1
    else:
        accelerator = None
        devices = None

    wandb_logger = WandbLogger(project="cyclical_hyperparameter_tuning_transformer", log_model="all", config=wandb_config)
    trainer = Trainer(
        logger=wandb_logger,
        max_epochs=70,
        callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)],
        accelerator=accelerator,
        devices=devices,
        enable_checkpointing=False,
        enable_progress_bar=False
    )

    trainer.fit(module, train_dataloaders=train_loader, val_dataloaders=val_loader)
    val_result = trainer.validate(module, dataloaders=val_loader, verbose=False)
    val_loss = val_result[0].get('val_loss', float('inf'))
    wandb.finish()
    return val_loss


seed_value = 42
torch.manual_seed(seed_value)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision("medium")

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

print("Best hyperparameters: ", study.best_trial.params)

[*********************100%%**********************]  1 of 1 completed
[I 2024-06-02 14:00:01,222] A new study created in memory with name: no-name-7be9d591-fe5a-46f0-b75b-f6ea1055510f
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfrederik135[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 790 K 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
790 K     Trainable params
0         Non-trainable params
790 K     Total params
3.160     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▅▄▄▆▃▂▂▃▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▅▆▆▅▃▃▃▂▂▃▂▂▂▂▂▁▁▂▂▁▂▁▁▁▁▁▁▁▃▁▁▁▂▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,█▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mape,█▅▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mse,█▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_r2,▁▅▆▆▇▇██████████████████████████████████

0,1
epoch,47.0
hit_rate,1.0
train_loss_epoch,0.00012
train_loss_step,0.00266
trainer/global_step,6533.0
val_loss,0.00172
val_mae,0.00657
val_mape,0.00655
val_mse,8e-05
val_r2,-0.04663


[I 2024-06-02 14:01:47,580] Trial 0 finished with value: 0.0017164145829156041 and parameters: {'learning_rate': 1.0011081325003182e-05, 'num_layers': 2, 'num_heads': 8, 'hidden_size_multipliers': 11, 'dropout_prob': 0.24393125530713555}. Best is trial 0 with value: 0.0017164145829156041.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011154017588890182, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 30.9 K
1 | criterion | MSELoss          | 0     
-----------------------------------------------
30.9 K    Trainable params
0         Non-trainable params
30.9 K    Total params
0.123     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.008 MB uploaded\r'), FloatProgress(value=0.1498723714598274, max=1.0…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mape,█▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mse,█▁▁▁▁▁▁▁▁▁▁▁▁▁
val_r2,▁█████████████

0,1
epoch,13.0
hit_rate,1.0
train_loss_epoch,0.00012
train_loss_step,0.00215
trainer/global_step,1807.0
val_loss,0.00166
val_mae,0.00642
val_mape,0.00641
val_mse,8e-05
val_r2,-0.00904


[I 2024-06-02 14:02:43,509] Trial 1 finished with value: 0.0016582789830863476 and parameters: {'learning_rate': 0.003018704416886663, 'num_layers': 3, 'num_heads': 2, 'hidden_size_multipliers': 1, 'dropout_prob': 0.12764197639858893}. Best is trial 1 with value: 0.0016582789830863476.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011126127777778392, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 55.7 K
1 | criterion | MSELoss          | 0     
-----------------------------------------------
55.7 K    Trainable params
0         Non-trainable params
55.7 K    Total params
0.223     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▄▃▃▃▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,▄▃▂▇▄▂▅▂▃▂▆▂▂▅▄▂▂▁▆▃▁▅▃▃▁▂▁▂▃█▁▁▂▇▃▁▁▁▂▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,▆▄▆██▇▃▅▅▃▃▂▃▂▃▂▂▂▂▂▂▂▁▁▂▁▂▂▃▂▁▁▁▁▂▁▁▂▁▁
val_mae,▆▄▆██▇▃▅▅▂▂▂▃▂▃▂▂▂▂▂▂▂▁▁▂▁▂▂▃▂▁▁▁▁▂▁▁▁▁▁
val_mape,▆▄▆██▇▃▅▅▂▂▂▃▂▃▂▂▂▂▂▁▂▁▁▂▁▁▂▃▂▁▁▁▁▂▁▁▁▁▁
val_mse,▆▄▆██▇▃▅▅▃▃▂▃▂▃▂▂▂▂▂▂▂▁▁▂▁▂▂▃▂▁▁▁▁▂▁▁▂▁▁
val_r2,▃▅▃▁▁▂▆▄▄▇▆▇▆▇▆▇▇▇▇▇█▇██▇██▇▆▇████▇█████

0,1
epoch,47.0
hit_rate,1.0
train_loss_epoch,0.00012
train_loss_step,0.00145
trainer/global_step,6533.0
val_loss,0.0017
val_mae,0.00653
val_mape,0.00651
val_mse,8e-05
val_r2,-0.0413


[I 2024-06-02 14:04:59,966] Trial 2 finished with value: 0.0017043797997757792 and parameters: {'learning_rate': 0.003057018011582537, 'num_layers': 3, 'num_heads': 2, 'hidden_size_multipliers': 2, 'dropout_prob': 0.27092519524035186}. Best is trial 1 with value: 0.0016582789830863476.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011158434266667428, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 526 K 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
526 K     Trainable params
0         Non-trainable params
526 K     Total params
2.106     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,▇█▃▂▂▃▂▂▂▂▂▁▂▂▂▂▁▁▁▂▁▂▂▂▂▂▁▂▁▂▁▂
train_loss_step,▃▂▂▂▂▂▁▁▁▂▂▁▁▁▂▂▁▁▁▁▂▁▁▁▁▁█▁▂▂▂▂▂▂▁▁▁▁▄▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
val_loss,█▆▂▂▃▁▁▁▁▁▃▂▂▂▁▁▃▁▁▁▁▁▃▁▂▁▂▂▃█▂▃▃
val_mae,█▆▃▃▃▂▂▂▁▂▃▂▂▂▂▁▃▁▁▁▁▁▃▁▂▁▂▂▂▆▂▂▂
val_mape,█▅▃▃▃▂▂▂▁▂▂▂▂▂▂▁▃▁▁▁▁▁▃▁▂▁▂▂▂▆▁▂▂
val_mse,█▆▂▂▃▁▁▁▁▁▃▂▂▂▁▁▃▁▁▁▁▁▃▁▂▁▂▂▃█▂▃▃
val_r2,▁▄▇▇▆█████▆▇▇▇██▆█████▆█▇█▇▇▇▃▇▇▇

0,1
epoch,32.0
hit_rate,1.0
train_loss_epoch,0.00012
train_loss_step,0.00433
trainer/global_step,4448.0
val_loss,0.00168
val_mae,0.00646
val_mape,0.00645
val_mse,8e-05
val_r2,-0.02366


[I 2024-06-02 14:06:16,240] Trial 3 finished with value: 0.001683520502410829 and parameters: {'learning_rate': 0.0006377580927881986, 'num_layers': 2, 'num_heads': 4, 'hidden_size_multipliers': 15, 'dropout_prob': 0.30083757076604656}. Best is trial 1 with value: 0.0016582789830863476.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011133692599999752, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 1.0 M 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 M     Total params
4.104     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▆▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_loss,▂█▂▁▂▁▁▁▂▂▁▁▁▁▁▁▁
val_mae,▃█▃▁▃▁▁▁▂▃▁▁▂▁▁▂▂
val_mape,▃█▃▁▃▁▁▁▂▃▁▁▂▁▁▂▂
val_mse,▂█▂▁▂▁▁▁▂▂▁▁▁▁▁▁▁
val_r2,▇▁▇█▇███▇▇███████

0,1
epoch,16.0
hit_rate,1.0
train_loss_epoch,0.00013
train_loss_step,0.00907
trainer/global_step,2224.0
val_loss,0.00246
val_mae,0.00865
val_mape,0.00871
val_mse,0.00012
val_r2,-0.68959


[I 2024-06-02 14:07:03,938] Trial 4 finished with value: 0.0024638778995722532 and parameters: {'learning_rate': 0.06461074121620844, 'num_layers': 2, 'num_heads': 8, 'hidden_size_multipliers': 14, 'dropout_prob': 0.11240434630188645}. Best is trial 1 with value: 0.0016582789830863476.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011162918977776017, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 1.2 M 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.738     Total estimated model params size (MB)


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▆▄▂▃▂▃▂▄▂▂▂▂▁▂
train_loss_step,▄▃▆▃▄▃▄▂▄█▁▅▂▃▂▁▃▃▃▇▄▁▄▃▂▃▁▃▂▂▂▆▁██▂▆▂▄▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
val_loss,▅▆▇▃▁▃▃▅██▇▆▃▃▄▄
val_mae,▅▆▇▃▁▃▃▅██▇▆▃▃▄▄
val_mape,▅▅▇▃▁▃▃▅██▇▆▃▃▄▄
val_mse,▅▆▇▃▁▃▃▅██▇▆▃▃▄▄
val_r2,▄▃▂▆█▆▆▄▁▁▂▃▆▆▅▅

0,1
epoch,15.0
hit_rate,1.0
train_loss_epoch,0.00012
train_loss_step,0.00101
trainer/global_step,2085.0
val_loss,0.00178
val_mae,0.00671
val_mape,0.00669
val_mse,9e-05
val_r2,-0.10134


[I 2024-06-02 14:07:59,713] Trial 5 finished with value: 0.001782930106855929 and parameters: {'learning_rate': 0.0070325091803032334, 'num_layers': 3, 'num_heads': 8, 'hidden_size_multipliers': 11, 'dropout_prob': 0.12356704492219633}. Best is trial 1 with value: 0.0016582789830863476.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011116819900000035, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 26.9 K
1 | criterion | MSELoss          | 0     
-----------------------------------------------
26.9 K    Trainable params
0         Non-trainable params
26.9 K    Total params
0.108     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=70` reached.


VBox(children=(Label(value='0.001 MB of 0.008 MB uploaded\r'), FloatProgress(value=0.14902102973168962, max=1.…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▄▃▃▂▂▃▂▁▂▂▂▁▃▂▂▂▂▂▂▃▁▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂
train_loss_step,█▄▂▂▂▂▁▂▂▂▁▁▂▁▂▁▂▁▁▁▁▁▂▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mape,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mse,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_r2,▁▆██████████████████████████████████████

0,1
epoch,70.0
hit_rate,1.0
train_loss_epoch,0.00017
train_loss_step,0.0094
trainer/global_step,9730.0
val_loss,0.00178
val_mae,0.00671
val_mape,0.00669
val_mse,9e-05
val_r2,-0.10341


[I 2024-06-02 14:09:37,914] Trial 6 finished with value: 0.0017820895882323384 and parameters: {'learning_rate': 1.2491318781595989e-05, 'num_layers': 1, 'num_heads': 2, 'hidden_size_multipliers': 3, 'dropout_prob': 0.24541517213391278}. Best is trial 1 with value: 0.0016582789830863476.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011159983333333104, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 625 K 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
625 K     Trainable params
0         Non-trainable params
625 K     Total params
2.503     Total estimated model params size (MB)
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
hit_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_epoch,█▂▁▂▃▃▂▄▄▃▂▃▃▄▃▃▁▂▂▁▂▃▂▃▂▂▂▂▂▂
train_loss_step,▄▃▂▂▂▁▁▁▂▃▁▂▂▂▁▃▃▃▂▁▂▂▁▂▁▁▁▂▄▃▂▁█▃▁▁▂▁▁▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,▇▆▆▅▆▇▇██▅▅▅▅▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,▇▆▆▅▆▇▇██▅▅▅▅▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mape,▇▆▆▅▆▇▇██▅▅▅▅▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mse,▇▆▆▅▆▇▇██▅▅▅▅▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_r2,▂▃▃▄▃▂▂▁▁▄▄▄▄▅▆▆▇██████████████

0,1
epoch,30.0
hit_rate,1.0
train_loss_epoch,0.00012
train_loss_step,0.00365
trainer/global_step,4212.0
val_loss,0.00169
val_mae,0.00649
val_mape,0.00648
val_mse,8e-05
val_r2,-0.02919


[I 2024-06-02 14:11:08,057] Trial 7 finished with value: 0.0016880895709618926 and parameters: {'learning_rate': 0.014157065500477062, 'num_layers': 3, 'num_heads': 8, 'hidden_size_multipliers': 6, 'dropout_prob': 0.2740561789751887}. Best is trial 1 with value: 0.0016582789830863476.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011164535644444288, max=1.0…

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/masterarbeit/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:391: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | TransformerModel | 103 K 
1 | criterion | MSELoss          | 0     
-----------------------------------------------
103 K     Trainable params
0         Non-trainable params
103 K     Total params
0.413     Total estimated model params size (MB)
