# Univariate Multi-Step LSTM

In [1]:
#import packages needed
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import math

In [2]:
#Data sequencing function 
def univariate_multi_step(sequence,window_size,n_multistep):
    x, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + window_size
        out_ix = end_ix+n_multistep
        # check if we are beyond the sequence
        if out_ix > len(sequence):
            break
    # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_ix]
        x.append(seq_x)
        y.append(seq_y)
    return np.array(x), np.array(y)

## Hyper parameter

In [46]:
split_ratio = 0.70
num_epochs = 30
n_step = 2 

#seed
torch.manual_seed(123)

<torch._C.Generator at 0x27e96a4c590>

# Data Preparation

In [47]:
#Synthetic Data
univariate_series = np.array([x for x in range(0, 400, 10)])
print(univariate_series.shape)
univariate_series

(40,)


array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120,
       130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250,
       260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380,
       390])

## Data Splitting

In [48]:
split_data = round(len(univariate_series)*split_ratio)
split_data

28

In [49]:
#split data by indexing 
train_data = univariate_series[:split_data]
test_data = univariate_series[split_data:]
print("train_data_shape")
print(train_data.shape)
print("test_data_shape")
print(test_data.shape)

train_data_shape
(28,)
test_data_shape
(12,)


# Data Normalization

In [50]:
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1, 1))

test_data_normalized = scaler.fit_transform(test_data.reshape(-1, 1))

## Data Sequencing

In [51]:
trainX ,trainY =  univariate_multi_step(train_data_normalized,3,n_step)
testX , testY = univariate_multi_step(test_data_normalized,3,n_step)
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testX shape:{testY.shape}")

trainX shape:(24, 3, 1) trainY shape:(24, 2, 1)

testX shape:(8, 3, 1) testX shape:(8, 2, 1)


## Data Transform

In [52]:
#transfrom to Pytorch tensor
trainX = torch.as_tensor(trainX).float()
trainY = torch.as_tensor(trainY).float()
testX = torch.as_tensor(testX).float()
testY = torch.as_tensor(testY).float()
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testY shape:{testY.shape}")

trainX shape:torch.Size([24, 3, 1]) trainY shape:torch.Size([24, 2, 1])

testX shape:torch.Size([8, 3, 1]) testY shape:torch.Size([8, 2, 1])


In [53]:
print(f"Features are now in the shape of {trainX.shape} while labels are now in the shape of {trainY.shape}\n")
print("x-feature\n"+str(trainX.shape[0])+" = total number of data ")
print(str(trainX.shape[1])+" = window size ")
print(str(trainX.shape[2])+" = number of time series\n")
print("y-label\n"+str(trainY.shape[0])+" = number of data")
print(str(trainY.shape[1])+" = number of step\n")

Features are now in the shape of torch.Size([24, 3, 1]) while labels are now in the shape of torch.Size([24, 2, 1])

x-feature
24 = total number of data 
3 = window size 
1 = number of time series

y-label
24 = number of data
2 = number of step



# LSTM Model Configuration

## Vanila LSTM

In [14]:
class LSTM(nn.Module):

        def __init__(self, n_feature, hidden_dim, num_layers, output_dim):
            super(LSTM, self).__init__()

            self.n_feature = n_feature
            # Hidden dimensions
            self.hidden_dim = hidden_dim

            # Number of hidden layers
            self.num_layers = num_layers

            # Building your LSTM
            # batch_first=True causes input/output tensors to be of shape
            # (batch_dim, seq_dim, feature_dim)
            self.lstm = nn.LSTM(n_feature, hidden_dim, num_layers, batch_first=True)

            # Readout layer
            self.fc = nn.Linear(hidden_dim, output_dim)


        def forward(self, x):
            # Initialize hidden state with zeros
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

            # Initialize cell state
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

            # One time step
            # We need to detach as we are doing truncated backpropagation through time (BPTT)
            # If we don't, we'll backprop all the way to the start even after going through another batch
            out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

            # Index hidden state of last time step
            # out.size() --> 100, 28, 100
            # out[:, -1, :] --> 100, 100 --> just want last time step hidden states!
            out = self.fc(out[:, -1, :])
            # out.size() --> 100, 10
            return out

## Bidirectional LSTM

In [15]:
class BidirectionalLSTM(nn.Module):

    def __init__(self, n_feature, hidden_dim, num_layers, output_dim):
        super(BidirectionalLSTM, self).__init__()

        self.n_feature = n_feature
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.num_layers = num_layers

        # Building your LSTM
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(n_feature, hidden_dim, num_layers, batch_first=True,bidirectional=True)

        # Readout layer *2 for bidirectional LSTM
        self.fc = nn.Linear(hidden_dim*2, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_dim).requires_grad_()

        # Initialize cell state
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_dim).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        # Index hidden state of last time step
        out = self.fc(out[:, -1, :])
        
        return out

## Input Model

In [54]:
#Arguments for LSTM model
hidden_dim = 10
number_of_time_series = 1 
timestep = 2

#1 for vanila LSTM , >1 is mean stacked LSTM
num_layers = 1 

#Vanila , Stacked LSTM
model = LSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)

#Bidirectional LSTM
# model = BidirectionalLSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)


In [55]:
#loss function 
loss_fn = torch.nn.MSELoss()

#optimiser
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

## Training

In [56]:
for t in range(num_epochs):
    # Initialise hidden state
#     Don't do this if you want your LSTM to be stateful
#     model.hidden = model.init_hidden()

    # Forward pass
    y_train_pred = model(trainX)
    
    #Reshape to perform MSE 
    y_train_pred=torch.reshape(y_train_pred,(trainY.shape[0],trainY.shape[1],1))

    loss = loss_fn(y_train_pred, trainY)
    print("Epoch ", t, "MSE: ", loss.item())

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()

Epoch  0 MSE:  0.2841896712779999
Epoch  1 MSE:  0.27279722690582275
Epoch  2 MSE:  0.26188135147094727
Epoch  3 MSE:  0.2514563500881195
Epoch  4 MSE:  0.24142096936702728
Epoch  5 MSE:  0.23165695369243622
Epoch  6 MSE:  0.22206814587116241
Epoch  7 MSE:  0.21257181465625763
Epoch  8 MSE:  0.20303982496261597
Epoch  9 MSE:  0.19325660169124603
Epoch  10 MSE:  0.18291497230529785
Epoch  11 MSE:  0.1716640740633011
Epoch  12 MSE:  0.15920329093933105
Epoch  13 MSE:  0.14536152780056
Epoch  14 MSE:  0.13013184070587158
Epoch  15 MSE:  0.11367560178041458
Epoch  16 MSE:  0.09631427377462387
Epoch  17 MSE:  0.07851607352495193
Epoch  18 MSE:  0.06088617444038391
Epoch  19 MSE:  0.04417113587260246
Epoch  20 MSE:  0.029273075982928276
Epoch  21 MSE:  0.017233025282621384
Epoch  22 MSE:  0.009126913733780384
Epoch  23 MSE:  0.005811700597405434
Epoch  24 MSE:  0.007445821072906256
Epoch  25 MSE:  0.012896780855953693
Epoch  26 MSE:  0.019637469202280045
Epoch  27 MSE:  0.024817274883389473


## Prediction

In [57]:
# make predictions
y_test_pred = model(testX)

#Reshape to original data
y_train_pred = torch.reshape(y_train_pred,(y_train_pred.shape[0],y_train_pred.shape[1]))
trainY = torch.reshape(trainY,(trainY.shape[0],trainY.shape[1]))
y_test_pred = torch.reshape(y_test_pred,(y_test_pred.shape[0],y_test_pred.shape[1]))
testY = torch.reshape(testY,(testY.shape[0],testY.shape[1]))

In [58]:
#Invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(trainY.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(testY.detach().numpy())

In [59]:
print("y-test\t\t\t\ty-predict")
for i in range(len(y_test_pred)):
    print(f"{y_test[i]}\t\t{y_test_pred[i]}")

y-test				y-predict
[310. 320.]		[289.47516 288.94534]
[320. 330.]		[299.89438 298.6619 ]
[330. 340.]		[313.38794 311.63916]
[340. 350.]		[329.2237  327.36945]
[350. 360.]		[345.31842 343.9561 ]
[360. 370.]		[359.2459  359.01337]
[370. 380.]		[369.6973  371.06735]
[380. 390.]		[376.79083 379.96777]


In [60]:
print(f"y_test_shape : {y_test.shape}")
print(f"y_test_pred_shape : {y_test_pred.shape}")

y_test_shape : (8, 2)
y_test_pred_shape : (8, 2)


In [61]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 8.26 RMSE
Test Score: 12.50 RMSE


## Exercise for Univariate (Solution)

In [68]:
#Hyperparameter
num_epochs_shampoo = 100
split_ratio = 0.70
n_step = 2
#Hidden Layer for LSTM
hidden_dim = 32

#seed
torch.manual_seed(123)

<torch._C.Generator at 0x27e96a4c590>

In [63]:
shampoo = pd.read_csv('../datasets/others/shampoo-sales.csv')
shampoo_ts =shampoo['sales']
shampoo_ts.head() 

0    266.0
1    145.9
2    183.1
3    119.3
4    180.3
Name: sales, dtype: float64

In [64]:
#split data by indexing 
split_data = round(len(shampoo_ts)*split_ratio)
split_data

25

In [65]:
train_data_shampoo = shampoo_ts[:split_data]
test_data_shampoo = shampoo_ts[split_data:]
print("train_data_shampoo_shape")
print(train_data_shampoo.shape)
print("test_data_shampoo_shape")
print(test_data_shampoo.shape)

train_data_shampoo_shape
(25,)
test_data_shampoo_shape
(11,)


In [66]:
#Data Normalization

#Reshape before normalize
train_data_shampoo = train_data_shampoo.values.reshape(-1, 1)
test_data_shampoo = test_data_shampoo.values.reshape((-1, 1))

#Build Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized_shampoo = scaler.fit_transform(train_data_shampoo)

test_data_normalized_shampoo = scaler.fit_transform(test_data_shampoo)
train_data_normalized_shampoo[1:6]

array([[-0.82401588],
       [-0.57790275],
       [-1.        ],
       [-0.59642739],
       [-0.67449553]])

In [69]:
#Data Sequencing 
trainX_shampoo ,trainY_shampoo =  univariate_multi_step(train_data_normalized_shampoo,2,n_step)
testX_shampoo , testY_shampoo = univariate_multi_step(test_data_normalized_shampoo,2,n_step)
print(f"trainX shape:{trainX_shampoo.shape} trainY shape:{trainY_shampoo.shape}\n")
print(f"testX shape:{testX_shampoo.shape} testY shape:{testY_shampoo.shape}")



trainX shape:(22, 2, 1) trainY shape:(22, 2, 1)

testX shape:(8, 2, 1) testY shape:(8, 2, 1)


In [70]:
#Transfrom numpy to Pytorch tensor
trainX_shampoo = torch.as_tensor(trainX_shampoo).float()
trainY_shampoo = torch.as_tensor(trainY_shampoo).float()
testX_shampoo = torch.as_tensor(testX_shampoo).float()
testY_shampoo = torch.as_tensor(testY_shampoo).float()
print(f"trainX shape:{trainX_shampoo.shape} trainY shape:{trainY_shampoo.shape}\n")
print(f"testX shape:{testX_shampoo.shape} testX shape:{testY_shampoo.shape}")


trainX shape:torch.Size([22, 2, 1]) trainY shape:torch.Size([22, 2, 1])

testX shape:torch.Size([8, 2, 1]) testX shape:torch.Size([8, 2, 1])


In [71]:
#Arguments for LSTM model
number_of_time_series = 1 
timestep = 2
output_dim =1 
#1 for vanila LSTM , >1 is mean stacked LSTM
num_layers = 1

#Vanila ,Stacked LSTM
# model_shampoo = LSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)

#Bidirectional LSTM
model_shampoo = BidirectionalLSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)
model_shampoo.float()

BidirectionalLSTM(
  (lstm): LSTM(1, 32, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=64, out_features=2, bias=True)
)

In [72]:
loss_fn_shampoo = torch.nn.MSELoss()

optimiser_shampoo = torch.optim.Adam(model_shampoo.parameters(), lr=0.01)

In [74]:
for t in range(num_epochs_shampoo):
    # Initialise hidden state
    # Don't do this if you want your LSTM to be stateful
    # model.hidden = model.init_hidden()

    # Forward pass
    y_train_pred_shampoo = model_shampoo(trainX_shampoo)
    
    #Reshape to perform MSE 
    y_train_pred_shampoo=torch.reshape(y_train_pred_shampoo,(trainY_shampoo.shape[0],trainY_shampoo.shape[1],1))
    
    loss_shampoo = loss_fn_shampoo(y_train_pred_shampoo, trainY_shampoo)
    print("Epoch ", t, "MSE: ", loss_shampoo.item())

    # Zero out gradient, else they will accumulate between epochs
    optimiser_shampoo.zero_grad()

    # Backward pass
    loss_shampoo.backward()

    # Update parameters
    optimiser_shampoo.step()

Epoch  0 MSE:  0.31683558225631714
Epoch  1 MSE:  0.28915512561798096
Epoch  2 MSE:  0.26710045337677
Epoch  3 MSE:  0.24960164725780487
Epoch  4 MSE:  0.23722191154956818
Epoch  5 MSE:  0.23146137595176697
Epoch  6 MSE:  0.23219609260559082
Epoch  7 MSE:  0.23401685059070587
Epoch  8 MSE:  0.23180030286312103
Epoch  9 MSE:  0.22552178800106049
Epoch  10 MSE:  0.21760624647140503
Epoch  11 MSE:  0.21034663915634155
Epoch  12 MSE:  0.20492471754550934
Epoch  13 MSE:  0.2013656198978424
Epoch  14 MSE:  0.19887572526931763
Epoch  15 MSE:  0.19627250730991364
Epoch  16 MSE:  0.19260649383068085
Epoch  17 MSE:  0.18782959878444672
Epoch  18 MSE:  0.18294648826122284
Epoch  19 MSE:  0.17950576543807983
Epoch  20 MSE:  0.17850029468536377
Epoch  21 MSE:  0.1790970265865326
Epoch  22 MSE:  0.1791568100452423
Epoch  23 MSE:  0.17787708342075348
Epoch  24 MSE:  0.17662560939788818
Epoch  25 MSE:  0.17686747014522552
Epoch  26 MSE:  0.17790760099887848
Epoch  27 MSE:  0.17781467735767365
Epoch  2

In [75]:
#Make predictions
y_test_pred_shampoo = model_shampoo(testX_shampoo)

#Reshape to original data
y_train_pred_shampoo = torch.reshape(y_train_pred_shampoo,(y_train_pred_shampoo.shape[0],y_train_pred_shampoo.shape[1]))
trainY_shampoo = torch.reshape(trainY_shampoo,(trainY_shampoo.shape[0],trainY_shampoo.shape[1]))
y_test_pred_shampoo = torch.reshape(y_test_pred_shampoo,(y_test_pred_shampoo.shape[0],y_test_pred_shampoo.shape[1]))
testY_shampoo = torch.reshape(testY_shampoo,(testY_shampoo.shape[0],testY_shampoo.shape[1]))

In [76]:
#Invert predictions
y_train_pred_shampoo = scaler.inverse_transform(y_train_pred_shampoo.detach().numpy())
y_train_shampoo = scaler.inverse_transform(trainY_shampoo.detach().numpy())
y_test_pred_shampoo = scaler.inverse_transform(y_test_pred_shampoo.detach().numpy())
y_test_shampoo = scaler.inverse_transform(testY_shampoo.detach().numpy())

In [77]:
y_test_shampoo.shape

(8, 2)

In [79]:
print("y-test\t\t\ty-predict")
for i in range(len(y_test_shampoo)):
    print(f"{y_test_shampoo[i]}\t\t{y_test_pred_shampoo[i]}")

y-test			y-predict
[439.3     401.30002]		[421.08847 380.45023]
[401.30002 437.4    ]		[390.06454 398.96976]
[437.4 575.5]		[440.98538 463.0719 ]
[575.5     407.60004]		[428.87927 476.70508]
[407.60004 682.     ]		[476.0935  554.02496]
[682.  475.3]		[496.86307 401.22412]
[475.3 581.3]		[454.43622 527.33356]
[581.3 646.9]		[582.9071  532.88525]


In [80]:
print(f"y_test_shape : {y_test_shampoo.shape}")   
print(f"y_test_pred_shape : {y_test_pred_shampoo.shape}")

y_test_shape : (8, 2)
y_test_pred_shape : (8, 2)


In [81]:
#calculate root mean squared error
trainScore_shampoo = math.sqrt(mean_squared_error(y_train_shampoo[:,0], y_train_pred_shampoo[:,0]))
print('Train Score: %.2f RMSE' % (trainScore_shampoo))
testScore_shampoo = math.sqrt(mean_squared_error(y_test_shampoo[:,0], y_test_pred_shampoo[:,0]))
print('Test Score: %.2f RMSE' % (testScore_shampoo))

Train Score: 71.36 RMSE
Test Score: 87.59 RMSE
