# LSTM for Multivariate Time Series

In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch import optim

## Load Data

In [2]:
df: pd.DataFrame = pd.read_pickle("../../data/demo.pkl")
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-11-02,41.650002,42.125000,41.320000,42.027500,1.655736e+08
2017-11-03,43.500000,43.564999,42.779999,43.125000,2.375944e+08
2017-11-04,43.364166,43.625833,42.829999,43.270833,2.050980e+08
2017-11-05,43.228333,43.686667,42.880000,43.416667,1.726016e+08
2017-11-06,43.092499,43.747501,42.930000,43.562500,1.401052e+08
...,...,...,...,...,...
2022-10-28,148.199997,157.500000,147.820007,155.740005,1.647624e+08
2022-10-29,149.853333,156.413335,149.186671,154.940002,1.424893e+08
2022-10-30,151.506668,155.326670,150.553335,154.139999,1.202163e+08
2022-10-31,153.160004,154.240005,151.919998,153.339996,9.794320e+07


In [12]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')

## Encode Fields in a Data Frame

In [15]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(df.columns)
le.classes_

array(['Close', 'High', 'Low', 'Open', 'Volume'], dtype=object)

In [16]:
le.inverse_transform([1])

array(['High'], dtype=object)

In [17]:
X = df[le.classes_].to_numpy()
X

array([[4.20275002e+01, 4.21250000e+01, 4.13199997e+01, 4.16500015e+01,
        1.65573600e+08],
       [4.31250000e+01, 4.35649986e+01, 4.27799988e+01, 4.35000000e+01,
        2.37594400e+08],
       [4.32708333e+01, 4.36258329e+01, 4.28299993e+01, 4.33641663e+01,
        2.05098000e+08],
       ...,
       [1.54139999e+02, 1.55326670e+02, 1.50553335e+02, 1.51506668e+02,
        1.20216267e+08],
       [1.53339996e+02, 1.54240005e+02, 1.51919998e+02, 1.53160004e+02,
        9.79432000e+07],
       [1.50649994e+02, 1.55449997e+02, 1.49130005e+02, 1.55080002e+02,
        8.03214000e+07]])

In [20]:
X[:, le.transform(["Close", "Open", "High"])]

array([[ 42.02750015,  41.65000153,  42.125     ],
       [ 43.125     ,  43.5       ,  43.56499863],
       [ 43.27083333,  43.36416626,  43.62583288],
       ...,
       [154.13999939, 151.50666809, 155.32667033],
       [153.33999634, 153.16000366, 154.24000549],
       [150.6499939 , 155.08000183, 155.44999695]])

In [22]:
df[["Close", "Open", "High"]]

Unnamed: 0_level_0,Close,Open,High
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-11-02,42.027500,41.650002,42.125000
2017-11-03,43.125000,43.500000,43.564999
2017-11-04,43.270833,43.364166,43.625833
2017-11-05,43.416667,43.228333,43.686667
2017-11-06,43.562500,43.092499,43.747501
...,...,...,...
2022-10-28,155.740005,148.199997,157.500000
2022-10-29,154.940002,149.853333,156.413335
2022-10-30,154.139999,151.506668,155.326670
2022-10-31,153.339996,153.160004,154.240005


In [None]:
class LSTM(nn.Module):

    def __init__(
            self,
            input_size: int, hidden_size: int,
            num_layers: int, output_size: int
        ):
        
        # initialize super class
        super().__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size

        # LSTM layer
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True)

        # fully connected layer
        self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        
        # output has shape (N, L, H)
        # h_n has shape (num_layers, N, H)
        #
        # where N is the batch size
        # L is the sequence length
        # and H is the hidden size
        # output, (h_n, c_n) = self.lstm.forward(x, (h0.detach(), c0.detach()))
        output, (h_n, c_n) = self.lstm.forward(x)

        # in fact, we want the last hidden value
        # from the last LSTM layer, i.e., h_n[-1, :, :]
        h = h_n[-1, :, :]
        
        # get predicted value from
        # the fully connected layer
        y = self.fc.forward(h)

        return y

In [24]:
A = np.array([
    [1, 2],
    [3, 4]
])

B = np.array([
    [5, 6]
])

np.concatenate((A, B))

array([[1, 2],
       [3, 4],
       [5, 6]])

TypeError: can only concatenate list (not "int") to list