In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from torch.utils.data import DataLoader, TensorDataset
from utils.metrics import WeightedRMSE, weighted_RMSE, LinExLoss, LinLinLoss
import numpy as np
from utils.metrics import predicted_revenue
import matplotlib.pyplot as plt
from utils.postprocessing import ProcessedResult
from data.data_loader import Dataset_XGB

In [9]:
from datetime import datetime
now = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")

In [5]:
# # Fix seed for reproducibility
# SEED = 42
# torch.manual_seed(SEED)
# np.random.seed(SEED)

# # Sample dataset: inputs of 1D tensor length n and outputs of 1D tensor length 1
# train = Dataset_XGB(root_path='./data/processed/SRL/', data='SRL_NEG_04_08', flag='train', input_len=)
# val = Dataset_XGB(root_path='./data/processed/SRL/', data='SRL_NEG_04_08', flag='val')
# X_train = torch.Tensor(np.hstack(train.seqs_x).transpose(1, 0))
# y_train = torch.Tensor(np.hstack(train.seqs_y).transpose(1, 0))
# X_val = torch.Tensor(np.hstack(val.seqs_x).transpose(1, 0))
# y_val = torch.Tensor(np.hstack(val.seqs_y).transpose(1, 0))
# train_dataset = TensorDataset(X_train, y_train)
# val_dataset = TensorDataset(X_val, y_val)
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

SyntaxError: invalid syntax (816573329.py, line 7)

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# LSTM model definition
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # # Reshape input to have an additional dimension for input_size
        x = x.unsqueeze(-1)  # This assumes input x has shape (batch_size, sequence_length)
        
        # Initialize hidden and cell states
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).requires_grad_()
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).requires_grad_()
        
        # Pass through LSTM
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        
        # Fully connected layer to get the output
        out = self.fc(out[:, -1, :])
        return out

class Args():
    pass
    
args = Args()
args.scale = 'standard'
args.loss = 'rmse'
args.data = 'SRL_NEG_00_04'

# Objective function for Optuna
def objective(trial):
    # Define hyperparameters to be optimized
    
    args.learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    args.hidden_dim  = trial.suggest_int("hidden_dim", 16, 128, step=16)
    args.num_layers = trial.suggest_int("num_layers", 1, 3)
    args.input_len = trial.suggest_int('input_len', 56, 112, step=7)
    
    match args.loss:
        case 'linex':
            args.linex_weight = trial.suggest_float('linex_weight', 0.01, 3, step=0.01)
        case 'wrmse':
            args.wrmse_weight = trial.suggest_float('wrmse_weight', 1.0, 10.0, step=0.1)
        case 'linlin':
            args.linlin_weight = trial.suggest_float('linlin_weight', 0.05, 0.45, step=0.005)
    
    model = LSTMModel(input_dim=1, hidden_dim=args.hidden_dim, num_layers=args.num_layers).to(device)
    # Sample dataset: inputs of 1D tensor length n and outputs of 1D tensor length 1
    train = Dataset_XGB(root_path='./data/processed/SRL/', data=args.data, flag='train', input_len=args.input_len)
    val = Dataset_XGB(root_path='./data/processed/SRL/', data=args.data, flag='val', input_len=args.input_len)
    X_train = torch.Tensor(np.hstack(train.seqs_x).transpose(1, 0))
    y_train = torch.Tensor(np.hstack(train.seqs_y).transpose(1, 0))
    X_val = torch.Tensor(np.hstack(val.seqs_x).transpose(1, 0))
    y_val = torch.Tensor(np.hstack(val.seqs_y).transpose(1, 0))
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, pin_memory=True)    
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    
    # criterion = nn.MSELoss()
    
    match args.loss:
        case 'linex':
            criterion=LinExLoss(args.linex_weight)
        case 'wrmse':
            criterion=WeightedRMSE(args.wrmse_weight)
        case 'rmse':
            criterion=nn.MSELoss()
        case 'linlin':
            criterion=LinLinLoss(args.linlin_weight)
    
    for epoch in range(10):  # Training for 10 epochs as an example
        model.train()
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
    # Validation loss
    model.eval()
    val_loss = 0.0
    rev = 0.0
    preds = torch.Tensor()
    trues = torch.Tensor()
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            pred = model(batch_x)
            preds = torch.concat((preds, pred))
            trues = torch.concat((trues, batch_y))
            
            rev = predicted_revenue(pred.cpu().detach().numpy(), batch_y.cpu().detach().numpy())
            loss = criterion(pred, batch_y)
            val_loss += loss.item()
            rev += rev.item()
    
    trues = trues.numpy()
    preds = preds.numpy()
    
    result = ProcessedResult(preds, trues, args, val)
    fig = result.plot_pred_vs_true(result.pred)

    fig.savefig(f'./lstm/informer_result_{args.data}_{args.loss}_{now}_trial_{trial._trial_id}.png', bbox_inches='tight')
    
    return (val_loss / len(val_loader))

import logging
import sys

# Add stream handler of stdout to show the messages
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study_name = f"tune_lstm_{args.data}_{args.loss}_{now}"  # Unique identifier of the study.
storage_name = "sqlite:///lstm/{}.db".format(study_name)
study = optuna.create_study(study_name=study_name, storage=storage_name,
                            load_if_exists=True,
                            directions=['minimize'],
                            sampler=optuna.samplers.TPESampler(seed=42),
                            )

study.optimize(objective, n_trials=10)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("Value: ", trial.value)
print("Params: ")
for key, value in trial.params.items():
    print(f"{key}: {value}")


[32m[I 2023-08-08 21:57:05,295][0m Using an existing study with name 'tune_lstm_SRL_NEG_00_04_rmse_08-08-2023_21-52-50' instead of creating a new one.[0m


Using an existing study with name 'tune_lstm_SRL_NEG_00_04_rmse_08-08-2023_21-52-50' instead of creating a new one.
Using an existing study with name 'tune_lstm_SRL_NEG_00_04_rmse_08-08-2023_21-52-50' instead of creating a new one.
Using an existing study with name 'tune_lstm_SRL_NEG_00_04_rmse_08-08-2023_21-52-50' instead of creating a new one.
Using an existing study with name 'tune_lstm_SRL_NEG_00_04_rmse_08-08-2023_21-52-50' instead of creating a new one.


[32m[I 2023-08-08 21:57:46,835][0m Trial 2 finished with value: 0.020094833336770535 and parameters: {'learning_rate': 0.0001329291894316216, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 91}. Best is trial 2 with value: 0.020094833336770535.[0m


Trial 2 finished with value: 0.020094833336770535 and parameters: {'learning_rate': 0.0001329291894316216, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 91}. Best is trial 2 with value: 0.020094833336770535.
Trial 2 finished with value: 0.020094833336770535 and parameters: {'learning_rate': 0.0001329291894316216, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 91}. Best is trial 2 with value: 0.020094833336770535.
Trial 2 finished with value: 0.020094833336770535 and parameters: {'learning_rate': 0.0001329291894316216, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 91}. Best is trial 2 with value: 0.020094833336770535.
Trial 2 finished with value: 0.020094833336770535 and parameters: {'learning_rate': 0.0001329291894316216, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 91}. Best is trial 2 with value: 0.020094833336770535.


[32m[I 2023-08-08 21:57:54,626][0m Trial 3 finished with value: 0.049720652401447296 and parameters: {'learning_rate': 2.9380279387035334e-05, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 105}. Best is trial 2 with value: 0.020094833336770535.[0m


Trial 3 finished with value: 0.049720652401447296 and parameters: {'learning_rate': 2.9380279387035334e-05, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 105}. Best is trial 2 with value: 0.020094833336770535.
Trial 3 finished with value: 0.049720652401447296 and parameters: {'learning_rate': 2.9380279387035334e-05, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 105}. Best is trial 2 with value: 0.020094833336770535.
Trial 3 finished with value: 0.049720652401447296 and parameters: {'learning_rate': 2.9380279387035334e-05, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 105}. Best is trial 2 with value: 0.020094833336770535.
Trial 3 finished with value: 0.049720652401447296 and parameters: {'learning_rate': 2.9380279387035334e-05, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 105}. Best is trial 2 with value: 0.020094833336770535.


[32m[I 2023-08-08 21:58:08,459][0m Trial 4 finished with value: 0.009087164731075367 and parameters: {'learning_rate': 0.0006358358856676254, 'hidden_dim': 96, 'num_layers': 1, 'input_len': 112}. Best is trial 4 with value: 0.009087164731075367.[0m


Trial 4 finished with value: 0.009087164731075367 and parameters: {'learning_rate': 0.0006358358856676254, 'hidden_dim': 96, 'num_layers': 1, 'input_len': 112}. Best is trial 4 with value: 0.009087164731075367.
Trial 4 finished with value: 0.009087164731075367 and parameters: {'learning_rate': 0.0006358358856676254, 'hidden_dim': 96, 'num_layers': 1, 'input_len': 112}. Best is trial 4 with value: 0.009087164731075367.
Trial 4 finished with value: 0.009087164731075367 and parameters: {'learning_rate': 0.0006358358856676254, 'hidden_dim': 96, 'num_layers': 1, 'input_len': 112}. Best is trial 4 with value: 0.009087164731075367.
Trial 4 finished with value: 0.009087164731075367 and parameters: {'learning_rate': 0.0006358358856676254, 'hidden_dim': 96, 'num_layers': 1, 'input_len': 112}. Best is trial 4 with value: 0.009087164731075367.


[32m[I 2023-08-08 21:58:14,364][0m Trial 5 finished with value: 0.006634589129438003 and parameters: {'learning_rate': 0.00314288089084011, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.[0m


Trial 5 finished with value: 0.006634589129438003 and parameters: {'learning_rate': 0.00314288089084011, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.
Trial 5 finished with value: 0.006634589129438003 and parameters: {'learning_rate': 0.00314288089084011, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.
Trial 5 finished with value: 0.006634589129438003 and parameters: {'learning_rate': 0.00314288089084011, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.
Trial 5 finished with value: 0.006634589129438003 and parameters: {'learning_rate': 0.00314288089084011, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.


[32m[I 2023-08-08 21:58:30,527][0m Trial 6 finished with value: 0.029680818940202396 and parameters: {'learning_rate': 8.17949947521167e-05, 'hidden_dim': 80, 'num_layers': 2, 'input_len': 70}. Best is trial 5 with value: 0.006634589129438003.[0m


Trial 6 finished with value: 0.029680818940202396 and parameters: {'learning_rate': 8.17949947521167e-05, 'hidden_dim': 80, 'num_layers': 2, 'input_len': 70}. Best is trial 5 with value: 0.006634589129438003.
Trial 6 finished with value: 0.029680818940202396 and parameters: {'learning_rate': 8.17949947521167e-05, 'hidden_dim': 80, 'num_layers': 2, 'input_len': 70}. Best is trial 5 with value: 0.006634589129438003.
Trial 6 finished with value: 0.029680818940202396 and parameters: {'learning_rate': 8.17949947521167e-05, 'hidden_dim': 80, 'num_layers': 2, 'input_len': 70}. Best is trial 5 with value: 0.006634589129438003.
Trial 6 finished with value: 0.029680818940202396 and parameters: {'learning_rate': 8.17949947521167e-05, 'hidden_dim': 80, 'num_layers': 2, 'input_len': 70}. Best is trial 5 with value: 0.006634589129438003.


[32m[I 2023-08-08 21:58:37,203][0m Trial 7 finished with value: 0.031816901018222175 and parameters: {'learning_rate': 0.000684792009557478, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.[0m


Trial 7 finished with value: 0.031816901018222175 and parameters: {'learning_rate': 0.000684792009557478, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.
Trial 7 finished with value: 0.031816901018222175 and parameters: {'learning_rate': 0.000684792009557478, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.
Trial 7 finished with value: 0.031816901018222175 and parameters: {'learning_rate': 0.000684792009557478, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.
Trial 7 finished with value: 0.031816901018222175 and parameters: {'learning_rate': 0.000684792009557478, 'hidden_dim': 32, 'num_layers': 1, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.


[32m[I 2023-08-08 21:58:49,459][0m Trial 8 finished with value: 0.02296583727002144 and parameters: {'learning_rate': 0.00023345864076016249, 'hidden_dim': 112, 'num_layers': 1, 'input_len': 84}. Best is trial 5 with value: 0.006634589129438003.[0m


Trial 8 finished with value: 0.02296583727002144 and parameters: {'learning_rate': 0.00023345864076016249, 'hidden_dim': 112, 'num_layers': 1, 'input_len': 84}. Best is trial 5 with value: 0.006634589129438003.
Trial 8 finished with value: 0.02296583727002144 and parameters: {'learning_rate': 0.00023345864076016249, 'hidden_dim': 112, 'num_layers': 1, 'input_len': 84}. Best is trial 5 with value: 0.006634589129438003.
Trial 8 finished with value: 0.02296583727002144 and parameters: {'learning_rate': 0.00023345864076016249, 'hidden_dim': 112, 'num_layers': 1, 'input_len': 84}. Best is trial 5 with value: 0.006634589129438003.
Trial 8 finished with value: 0.02296583727002144 and parameters: {'learning_rate': 0.00023345864076016249, 'hidden_dim': 112, 'num_layers': 1, 'input_len': 84}. Best is trial 5 with value: 0.006634589129438003.


[32m[I 2023-08-08 21:58:56,539][0m Trial 9 finished with value: 0.019727333759268124 and parameters: {'learning_rate': 0.0005987474910461401, 'hidden_dim': 16, 'num_layers': 2, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.[0m


Trial 9 finished with value: 0.019727333759268124 and parameters: {'learning_rate': 0.0005987474910461401, 'hidden_dim': 16, 'num_layers': 2, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.
Trial 9 finished with value: 0.019727333759268124 and parameters: {'learning_rate': 0.0005987474910461401, 'hidden_dim': 16, 'num_layers': 2, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.
Trial 9 finished with value: 0.019727333759268124 and parameters: {'learning_rate': 0.0005987474910461401, 'hidden_dim': 16, 'num_layers': 2, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.
Trial 9 finished with value: 0.019727333759268124 and parameters: {'learning_rate': 0.0005987474910461401, 'hidden_dim': 16, 'num_layers': 2, 'input_len': 63}. Best is trial 5 with value: 0.006634589129438003.


[32m[I 2023-08-08 22:00:10,543][0m Trial 10 finished with value: 0.03191676797966162 and parameters: {'learning_rate': 1.5673095467235405e-05, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 105}. Best is trial 5 with value: 0.006634589129438003.[0m


Trial 10 finished with value: 0.03191676797966162 and parameters: {'learning_rate': 1.5673095467235405e-05, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 105}. Best is trial 5 with value: 0.006634589129438003.
Trial 10 finished with value: 0.03191676797966162 and parameters: {'learning_rate': 1.5673095467235405e-05, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 105}. Best is trial 5 with value: 0.006634589129438003.
Trial 10 finished with value: 0.03191676797966162 and parameters: {'learning_rate': 1.5673095467235405e-05, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 105}. Best is trial 5 with value: 0.006634589129438003.
Trial 10 finished with value: 0.03191676797966162 and parameters: {'learning_rate': 1.5673095467235405e-05, 'hidden_dim': 128, 'num_layers': 3, 'input_len': 105}. Best is trial 5 with value: 0.006634589129438003.


[32m[I 2023-08-08 22:00:20,886][0m Trial 11 finished with value: 0.07086291536688805 and parameters: {'learning_rate': 8.200518402245828e-05, 'hidden_dim': 16, 'num_layers': 3, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.[0m


Trial 11 finished with value: 0.07086291536688805 and parameters: {'learning_rate': 8.200518402245828e-05, 'hidden_dim': 16, 'num_layers': 3, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.
Trial 11 finished with value: 0.07086291536688805 and parameters: {'learning_rate': 8.200518402245828e-05, 'hidden_dim': 16, 'num_layers': 3, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.
Trial 11 finished with value: 0.07086291536688805 and parameters: {'learning_rate': 8.200518402245828e-05, 'hidden_dim': 16, 'num_layers': 3, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.
Trial 11 finished with value: 0.07086291536688805 and parameters: {'learning_rate': 8.200518402245828e-05, 'hidden_dim': 16, 'num_layers': 3, 'input_len': 77}. Best is trial 5 with value: 0.006634589129438003.
Number of finished trials:  12
Best trial:
Value:  0.006634589129438003
Params: 
hidden_dim: 32
input_len: 63
learning_rate: 0.00314288089084011
num_layers: 1


In [57]:
val.seqs_y

[array([[-0.27986925]]),
 array([[-0.2511806]]),
 array([[-0.26230074]]),
 array([[-0.27236809]]),
 array([[-0.27947445]]),
 array([[-0.16471986]]),
 array([[-0.26026096]]),
 array([[-0.26723572]]),
 array([[-0.25914236]]),
 array([[-0.26815691]]),
 array([[-0.27881646]]),
 array([[-0.27875066]]),
 array([[-0.28526482]]),
 array([[-0.25552338]]),
 array([[-0.26039255]]),
 array([[-0.26407734]]),
 array([[-0.27184169]]),
 array([[-0.27809266]]),
 array([[-0.28204064]]),
 array([[-0.28743621]]),
 array([[-0.26920971]]),
 array([[-0.23045371]]),
 array([[-0.1951193]]),
 array([[-0.24723262]]),
 array([[-0.25526018]]),
 array([[-0.25887916]]),
 array([[-0.27223649]]),
 array([[-0.2148592]]),
 array([[-0.18320956]]),
 array([[-0.12576647]]),
 array([[0.00056887]]),
 array([[-0.06950776]]),
 array([[-0.17287902]]),
 array([[-0.21512239]]),
 array([[-0.23190131]]),
 array([[-0.23459909]]),
 array([[-0.2326251]]),
 array([[-0.14655916]]),
 array([[-0.20735804]]),
 array([[-0.22492654]]),
 arra

In [47]:
val.seqs_x_date

[595   2022-06-20
 596   2022-06-21
 597   2022-06-22
 598   2022-06-23
 599   2022-06-24
 600   2022-06-25
 601   2022-06-26
 602   2022-06-27
 603   2022-06-28
 604   2022-06-29
 605   2022-06-30
 606   2022-07-01
 607   2022-07-02
 608   2022-07-03
 609   2022-07-04
 610   2022-07-05
 611   2022-07-06
 612   2022-07-07
 613   2022-07-08
 614   2022-07-09
 615   2022-07-10
 616   2022-07-11
 617   2022-07-12
 618   2022-07-13
 619   2022-07-14
 620   2022-07-15
 621   2022-07-16
 622   2022-07-17
 623   2022-07-18
 624   2022-07-19
 625   2022-07-20
 626   2022-07-21
 627   2022-07-22
 628   2022-07-23
 629   2022-07-24
 630   2022-07-25
 631   2022-07-26
 632   2022-07-27
 633   2022-07-28
 634   2022-07-29
 635   2022-07-30
 636   2022-07-31
 637   2022-08-01
 638   2022-08-02
 639   2022-08-03
 640   2022-08-04
 641   2022-08-05
 642   2022-08-06
 Name: date, dtype: datetime64[ns],
 596   2022-06-21
 597   2022-06-22
 598   2022-06-23
 599   2022-06-24
 600   2022-06-25
 601   202