
# **Financial Time Series prediction using LSTM and Multivariate Linear Regression**



In [200]:
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from torch.autograd import Variable
from torch.utils.data import Dataset,DataLoader,TensorDataset
from sklearn.preprocessing import MinMaxScaler
import torch.nn.functional as F

##Intro - data exploration, visualization etc

In [201]:
import pandas as pd
#df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/sandp500/all_stocks_5yr.csv', index_col = "Name")
# sp500 = pd.read_csv('ARIMA_Results.csv')
# sp500 = sp500.loc[:,["residuals"]][3000:]
sp500 = pd.read_csv('SP500.csv')
sp500 = sp500.loc[:,["Adj Close"]]
sp500

Unnamed: 0,Adj Close
0,330.220001
1,326.450012
2,321.910004
3,321.000000
4,315.440002
...,...
6750,2553.169922
6751,2557.639893
6752,2559.360107
6753,2561.260010


We'll need this dataset in the end of the work.

In [202]:
sp500_close_prices = sp500.values.astype(float)
print(sp500_close_prices)

[[ 330.220001]
 [ 326.450012]
 [ 321.910004]
 ...
 [2559.360107]
 [2561.26001 ]
 [2562.100098]]


In [203]:
## Data Preprocessing

#Market close data extraction

def create_sequences(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

#Data normalization
scaler = MinMaxScaler()
training_data = scaler.fit_transform(sp500_close_prices)

seq_length = 16
x, y = create_sequences(training_data, seq_length)

train_size = 5500
validate_size = 500
test_size = len(y) - train_size - validate_size
batch_size_value = 64
dataX = torch.Tensor(np.array(x))
dataY = torch.Tensor(np.array(y))

X_train = torch.Tensor(np.array(x[0:train_size]))
Y_train = torch.Tensor(np.array(y[0:train_size]))

training_dataset = TensorDataset(X_train,Y_train)
train_loader = DataLoader(training_dataset, num_workers=1, shuffle=True,batch_size = batch_size_value,drop_last = True)

X_validate = torch.Tensor(np.array(x[train_size:train_size+validate_size]))
Y_validate = torch.Tensor(np.array(y[train_size:train_size+validate_size]))

validate_dataset = TensorDataset(X_validate,Y_validate)
validate_loader = DataLoader(validate_dataset, num_workers=1, shuffle=True,batch_size = batch_size_value,drop_last = True)

X_test = torch.Tensor(np.array(x[train_size+validate_size:len(x)]))
Y_test = torch.Tensor(np.array(y[train_size+validate_size:len(y)]))
print(X_train[0],Y_train[0])
print(X_train[1],Y_train[1])

tensor([[0.0083],
        [0.0066],
        [0.0046],
        [0.0042],
        [0.0018],
        [0.0015],
        [0.0000],
        [0.0014],
        [0.0017],
        [0.0004],
        [0.0010],
        [0.0021],
        [0.0073],
        [0.0092],
        [0.0087],
        [0.0075]]) tensor([0.0083])
tensor([[0.0066],
        [0.0046],
        [0.0042],
        [0.0018],
        [0.0015],
        [0.0000],
        [0.0014],
        [0.0017],
        [0.0004],
        [0.0010],
        [0.0021],
        [0.0073],
        [0.0092],
        [0.0087],
        [0.0075],
        [0.0083]]) tensor([0.0103])


## Model

In [204]:
class AttnDecoder(nn.Module):

    def __init__(self, code_hidden_size, hidden_size, time_step):
        super(AttnDecoder, self).__init__()
        self.code_hidden_size = code_hidden_size
        self.hidden_size = hidden_size
        self.T = time_step

        self.W = nn.Linear(in_features=2 * hidden_size, out_features=code_hidden_size)
        self.U = nn.Linear(in_features=code_hidden_size, out_features=code_hidden_size)
        self.tanh = nn.Tanh()
        self.V = nn.Linear(in_features=code_hidden_size, out_features=1)
        self.lstm = nn.LSTM(input_size=1, hidden_size=self.hidden_size)
        self.tilde = nn.Linear(in_features=self.code_hidden_size + 1, out_features=1)
        self.fc1 = nn.Linear(in_features=code_hidden_size + hidden_size, out_features=hidden_size)
        self.fc2 = nn.Linear(in_features=hidden_size, out_features=1)

    def forward(self, encoder_output, y_seq, d , s):
        y_seq = y_seq.transpose(0,1)  #   1*64*1 -->64*1*1
        encoder_output = encoder_output.transpose(0,1)  # [8, 64, 16]->[64, 8, 16]

        # (seq_len, batch,  hidden_size):    
        batch_size = encoder_output.size(0)  
#         d = self.init_variable(1, batch_size, self.hidden_size)#. [1, 64, 16]
#         s = self.init_variable(1, batch_size, self.hidden_size)
        
        ct = self.init_variable(batch_size, self.hidden_size)    # [64, 16]
        
        for t in range(self.T):
            # batch_size * time_step * (encoder_hidden_size + decoder_hidden_size)
            # [time_step, 64, 16] ->[64 , tstep, 16] 
          
            h = encoder_output  # [64, 8, 16]
            d_s = torch.cat((self.embedding_hidden(d), self.embedding_hidden(s)), 2)    # [64 , 1, 16*2]
            z1 = self.W(d_s)  # [64 ,1, 16*2]-> [64 , 1, 16]
            z2 = self.U(h)  # [64, 8, 16]->[64, 8, 16]
            x = z1 + z2  # [64, 1, 16] + [64, 8, 16] = [64, 8, 16]
            z3 = self.V(self.tanh(x))   #   [64, 8, 16]->[64, 8, 1]

            beta_t = F.softmax(z3.view(batch_size, -1), dim=1)
            # yc 64*1*1
            # h [64, 8, 16]

            # [64, 1, 8] * [64, 8, 16] = [64, 1, 16] -> [64, 16]   batch_size * encoder_hidden_size
            ct = torch.bmm(beta_t.unsqueeze(1), h).squeeze(1)  # [64, 16]
            
            y_past = y_seq[:,t,:] # [64, 1]
            yc = torch.cat((y_past, ct), dim=1)  
            y_tilde = self.tilde(yc)
            _, states = self.lstm(y_tilde.unsqueeze(0), (d, s))    # [1, 64, 1]  [1, 64, 16]
            d = states[0]
            
        y_res = self.fc2(self.fc1(torch.cat((d.squeeze(0), ct), dim=1)))
        return y_res

    def init_variable(self, *args):
        zero_tensor = torch.zeros(args)
        if torch.cuda.is_available():
            zero_tensor = zero_tensor.cuda()
        return Variable(zero_tensor)

    def embedding_hidden(self, x):
        return x.repeat(self.T, 1, 1).permute(1, 0, 2)

    
class Encoder(nn.Module):
    
    def __init__(self, input_size, hidden_size,num_layers):
        super().__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size,num_layers)
        
    def forward(self, x):

        
        # Propagate input through LSTM
      
        output, (hidden_out, cell_out) = self.lstm(x)
        
        return output, (hidden_out, cell_out)
    
    
class Decoder(nn.Module):
    
    def __init__(self, input_size, hidden_size,num_layers):
        super().__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
  
        self.lstm = nn.LSTM(input_size, hidden_size,num_layers)
        
        
    def forward(self,x,hidden,cell):
        
        
     
        output, (hidden_out, cell_out) = self.lstm(x, (hidden, cell))
        
        return output, (hidden_out, cell_out)

class Seq2Seq(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,out_dims,attention_flag = False):
        super().__init__()
        
        self.encoder = Encoder(input_size,hidden_size,num_layers)
        if attention_flag == True:
            self.decoder = AttnDecoder(hidden_size, hidden_size, 1)
        if attention_flag == False:
            self.decoder = Decoder(input_size,hidden_size,num_layers)

        self.fc = nn.Linear(hidden_size, out_dims)
        
        
    def forward(self, enc_input, dec_input):
        
        encoder_output,(h_out,c_out) = self.encoder(enc_input)  # torch.Size([64, 8, 1])

        #dec_input [64, 1, 1]     
        # h_out  [1, 64, 16]
        
        if attention_flag == True:
            y_res = self.decoder(encoder_output, dec_input,h_out,c_out)
        if attention_flag == False:
            _, (decoder_h_out, _) = self.decoder(dec_input,h_out,c_out)
            y_res = self.fc(decoder_h_out)

       
        return y_res

    


## Training

In [205]:
num_epochs = 50
learning_rate = 0.005
input_dim = 1
hidden_size = 32
num_layers = 1
output_dim = 1
output_length = 1
attention_flag = True

model = Seq2Seq(input_dim,hidden_size,num_layers,output_dim,attention_flag)

criterion = torch.nn.MSELoss()    # mean-squared error
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model

Seq2Seq(
  (encoder): Encoder(
    (lstm): LSTM(1, 32)
  )
  (decoder): AttnDecoder(
    (W): Linear(in_features=64, out_features=32, bias=True)
    (U): Linear(in_features=32, out_features=32, bias=True)
    (tanh): Tanh()
    (V): Linear(in_features=32, out_features=1, bias=True)
    (lstm): LSTM(1, 32)
    (tilde): Linear(in_features=33, out_features=1, bias=True)
    (fc1): Linear(in_features=64, out_features=32, bias=True)
    (fc2): Linear(in_features=32, out_features=1, bias=True)
  )
  (fc): Linear(in_features=32, out_features=1, bias=True)
)

In [206]:
# Train the model
for epoch in range(num_epochs):
    for data in train_loader:
        X_train,Y_train = data
        Y_train = Y_train.unsqueeze(0)
        
        model.train()
        # decoder 的输入
        decoder_input = torch.zeros(output_length,batch_size_value,output_dim)  # 1,64,1

        #decoder_input = X_train[:,-1,:].unsqueeze(1)
        train_pred = model(X_train.transpose(0,1), decoder_input)
        
        # obtain the loss function
        train_loss = criterion(train_pred, Y_train)
        train_loss.backward(retain_graph=True) #backpropagation
        optimizer.step()
        optimizer.zero_grad()

        model.eval()
        for valdiate_data in validate_loader:
            X_validate,Y_validate = valdiate_data
            #decoder_input = X_validate[:,-1,:].unsqueeze(1)
            validate_pred =  model(X_validate.transpose(0,1),decoder_input)
            validate_loss = criterion(validate_pred,Y_validate)

    if epoch % 1 == 0:
        print("Epoch: %d, train_loss: %1.5f,validate_loss: %1.5f" % (epoch, train_loss,validate_loss))

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 0, train_loss: 0.00011,validate_loss: 0.00033


KeyboardInterrupt: 

## Plotting

In [None]:
model.eval()
train_predict = model(dataX.transpose(0,1),torch.zeros(1,dataX.size(0),1))
#print(train_predict.size(),dataY.size())
train_predict = train_predict.squeeze(0)
data_predict = train_predict.data.numpy()

dataY_plot = dataY.data.numpy()

data_predict = scaler.inverse_transform(data_predict)
dataY_plot = scaler.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot, label = "S&P actual value")
plt.plot(data_predict, label = "S&P predicted value")
plt.suptitle('S&P500 Forecasting')
plt.legend()
plt.show()

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

mse = mean_squared_error(dataY_plot[(train_size+validate_size):], data_predict[(train_size+validate_size):])   # 5500  
mae = mean_absolute_error(dataY_plot[(train_size+validate_size):], data_predict[(train_size+validate_size):])

print(f'X_test MSE: {mse}')
print(f'X_test MAE: {mae}')

In [None]:
plt.plot(dataY_plot[(train_size+validate_size):], label = "S&P actual value")
plt.plot(data_predict[(train_size+validate_size):], label = "S&P predicted value")
plt.suptitle('S&P500 Forecasting')
plt.legend()
plt.show()