In [14]:
import pandas as pd
import yfinance as yf
import lightning.pytorch as pl
from custom_dataset import MultivariateSeriesDataModule
from torchinfo import summary

In [15]:
sp500 = yf.download("^GSPC", period="max", start="2001-1-1")
sp500.drop(columns=["Adj Close"], inplace=True)
sp500.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2001-01-02,1320.280029,1320.280029,1276.050049,1283.27002,1129400000
2001-01-03,1283.27002,1347.76001,1274.619995,1347.560059,1880700000
2001-01-04,1347.560059,1350.23999,1329.140015,1333.339966,2131000000
2001-01-05,1333.339966,1334.77002,1294.949951,1298.349976,1430800000
2001-01-08,1298.349976,1298.349976,1276.290039,1295.859985,1115500000


In [16]:
from lightning.pytorch.loggers import TensorBoardLogger

logger = TensorBoardLogger("lightning_logs", name="lstm-multivariate-baseline")
N_LAGS = 7
HORIZON = 1
n_vars = sp500.shape[1] - 1

In [17]:
from typing import Any
import torch
from torch import nn
from torch.nn import functional as F

In [18]:
datamodule = MultivariateSeriesDataModule(
    data=sp500,
    n_lags=N_LAGS,
    horizon=HORIZON,
    batch_size=32,
    test_size=0.2,
    target_name="Close",
)

In [19]:
class MultivariateLSTM(pl.LightningModule):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc1 = nn.Linear(in_features=hidden_dim, out_features=16)
        self.fc2 = nn.Linear(in_features=16, out_features=output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.hidden_dim).to(
            self.device
        )
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.hidden_dim).to(
            self.device
        )
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]
        out = self.fc1(out)
        out = self.fc2(out)
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x["encoder_cont"])
        loss = F.mse_loss(y_pred, y[0])
        self.log("train_loss", loss, batch_size=x["encoder_cont"].shape[0])
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x["encoder_cont"])
        loss = F.mse_loss(y_pred, y[0])
        self.log("val_loss", loss, batch_size=x["encoder_cont"].shape[0])
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x["encoder_cont"])
        loss = F.mse_loss(y_pred, y[0])
        self.log("test_loss", loss, batch_size=x["encoder_cont"].shape[0])

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        y_pred = self(x["encoder_cont"])
        return y_pred

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

In [20]:
model = MultivariateLSTM(input_dim=n_vars, hidden_dim=256, num_layers=2, output_dim=1)

In [21]:
model

MultivariateLSTM(
  (lstm): LSTM(4, 256, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=256, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=1, bias=True)
)

In [22]:
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint, RichProgressBar

early_stop_callback = EarlyStopping(
    monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min"
)
model_checkpoint = ModelCheckpoint(
    dirpath="multivariate_checkpoints",
    filename="{epoch}-{val_loss:.2f}-{other_metric:.2f}",
)
rich_progress_bar = RichProgressBar()

In [23]:
trainer = pl.Trainer(
    max_epochs=30,
    logger=logger,
    callbacks=[early_stop_callback, model_checkpoint, rich_progress_bar],
)
trainer.fit(model, datamodule=datamodule)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Output()

In [24]:
trainer.test(model, datamodule=datamodule)

Output()

[{'test_loss': 0.3667064309120178}]

In [25]:
trainer.validate(model=model, datamodule=datamodule)

Output()

[{'val_loss': 0.006655900273472071}]