# Univariate LSTM

In [149]:
#import packages needed
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import math

In [150]:
#Data sequencing function 
def univariate_single_step(sequence, window_size):
    x, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + window_size
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
    # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        x.append(seq_x)
        y.append(seq_y)
    return np.array(x), np.array(y)


## Hyper parameter

In [151]:
split_ratio = 0.70
num_epochs = 60


#seed
torch.manual_seed(123)

<torch._C.Generator at 0x22c72a2b590>

# Data Preparation

In [152]:
#Synthetic Data
univariate_series = np.array([x for x in range(0, 400, 10)])
print(univariate_series.shape)
univariate_series

(40,)


array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120,
       130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250,
       260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380,
       390])

## Data Splitting

In [153]:
split_data = round(len(univariate_series)*split_ratio)
split_data

28

In [154]:
#split data by indexing 
train_data = univariate_series[:split_data]
test_data = univariate_series[split_data:]
print("train_data_shape")
print(train_data.shape)
print("test_data_shape")
print(test_data.shape)

train_data_shape
(28,)
test_data_shape
(12,)


## Data Normalization

In [155]:
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1, 1))

test_data_normalized = scaler.fit_transform(test_data.reshape(-1, 1))

## Data Sequencing

In [156]:
trainX ,trainY =  univariate_single_step(train_data_normalized,3)
testX , testY = univariate_single_step(test_data_normalized,3)
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testX shape:{testX.shape}")


trainX shape:(25, 3, 1) trainY shape:(25, 1)

testX shape:(9, 3, 1) testX shape:(9, 3, 1)


## Data Transform

In [157]:
#transfrom to Pytorch tensor
trainX = torch.as_tensor(trainX).float()
trainY = torch.as_tensor(trainY).float()
testX = torch.as_tensor(testX).float()
testY = torch.as_tensor(testY).float()
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testX shape:{testX.shape}")
trainX

trainX shape:torch.Size([25, 3, 1]) trainY shape:torch.Size([25, 1])

testX shape:torch.Size([9, 3, 1]) testX shape:torch.Size([9, 3, 1])


tensor([[[-1.0000],
         [-0.9259],
         [-0.8519]],

        [[-0.9259],
         [-0.8519],
         [-0.7778]],

        [[-0.8519],
         [-0.7778],
         [-0.7037]],

        [[-0.7778],
         [-0.7037],
         [-0.6296]],

        [[-0.7037],
         [-0.6296],
         [-0.5556]],

        [[-0.6296],
         [-0.5556],
         [-0.4815]],

        [[-0.5556],
         [-0.4815],
         [-0.4074]],

        [[-0.4815],
         [-0.4074],
         [-0.3333]],

        [[-0.4074],
         [-0.3333],
         [-0.2593]],

        [[-0.3333],
         [-0.2593],
         [-0.1852]],

        [[-0.2593],
         [-0.1852],
         [-0.1111]],

        [[-0.1852],
         [-0.1111],
         [-0.0370]],

        [[-0.1111],
         [-0.0370],
         [ 0.0370]],

        [[-0.0370],
         [ 0.0370],
         [ 0.1111]],

        [[ 0.0370],
         [ 0.1111],
         [ 0.1852]],

        [[ 0.1111],
         [ 0.1852],
         [ 0.2593]],

        

In [158]:
print(f"Features are now in the shape of {trainX.shape} while labels are now in the shape of {trainY.shape}\n")
print("x-feature\n"+str(trainX.shape[0])+" = total number of data ")
print(str(trainX.shape[1])+" = window size ")
print(str(trainX.shape[2])+" = number of time series\n")
print("y-label\n"+str(trainY.shape[0])+" = number of data")
print(str(trainY.shape[1])+" = number of step\n")

Features are now in the shape of torch.Size([25, 3, 1]) while labels are now in the shape of torch.Size([25, 1])

x-feature
25 = total number of data 
3 = window size 
1 = number of time series

y-label
25 = number of data
1 = number of step



# LSTM Model Configuration 

## Vanila LSTM

In [159]:
class LSTM(nn.Module):

        def __init__(self, n_feature, hidden_dim, num_layers, output_dim):
            super(LSTM, self).__init__()

            self.n_feature = n_feature
            # Hidden dimensions
            self.hidden_dim = hidden_dim

            # Number of hidden layers
            self.num_layers = num_layers

            # Building your LSTM
            # batch_first=True causes input/output tensors to be of shape
            # (batch_dim, seq_dim, feature_dim)
            self.lstm = nn.LSTM(n_feature, hidden_dim, num_layers, batch_first=True)

            # Readout layer
            self.fc = nn.Linear(hidden_dim, output_dim)


        def forward(self, x):
            # Initialize hidden state with zeros
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

            # Initialize cell state
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

            # One time step
            # We need to detach as we are doing truncated backpropagation through time (BPTT)
            # If we don't, we'll backprop all the way to the start even after going through another batch
            out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

            # Index hidden state of last time step
            # out.size() --> 100, 28, 100
            # out[:, -1, :] --> 100, 100 --> just want last time step hidden states!
            out = self.fc(out[:, -1, :])
            # out.size() --> 100, 10
            return out

## Bidirectional LSTM

In [160]:
class BidirectionalLSTM(nn.Module):

    def __init__(self, n_feature, hidden_dim, num_layers, output_dim):
        super(BidirectionalLSTM, self).__init__()

        self.n_feature = n_feature
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.num_layers = num_layers

        # Building your LSTM
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(n_feature, hidden_dim, num_layers, batch_first=True,bidirectional=True)

        # Readout layer *2 for bidirectional LSTM
        self.fc = nn.Linear(hidden_dim*2, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers*2, x.shape(0), self.hidden_dim).requires_grad_()

        # Initialize cell state
        c0 = torch.zeros(self.num_layers*2, x.shape(0), self.hidden_dim).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        # Index hidden state of last time step
        out = self.fc(out[:, -1, :])
        
        return out

## Input Model

In [161]:
#Arguments for LSTM model
hidden_dim = 10
number_of_time_series = 1 
timestep = 1
output_dim =1 

#1 for vanila LSTM , >1 is mean stacked LSTM
num_layers = 3 

#model for vanila ,stack
model = LSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)
#model for bidirectional
# model = BidirectionalLSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)
model.float()

LSTM(
  (lstm): LSTM(1, 10, num_layers=3, batch_first=True)
  (fc): Linear(in_features=10, out_features=1, bias=True)
)

In [162]:
#loss function 
loss_fn = torch.nn.MSELoss()

#optimiser
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

## Training

In [163]:
for t in range(num_epochs):
    # Initialise hidden state
#     Don't do this if you want your LSTM to be stateful
#     model.hidden = model.init_hidden()

    # Forward pass
    y_train_pred = model(trainX.float())
    # print("after transform y_train_pred.shape"+str(y_train_pred.shape))

    loss = loss_fn(y_train_pred, trainY)
    print("Epoch ", t, "MSE: ", loss.item())

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()

Epoch  0 MSE:  0.3688581585884094
Epoch  1 MSE:  0.34376877546310425
Epoch  2 MSE:  0.32158589363098145
Epoch  3 MSE:  0.3028506338596344
Epoch  4 MSE:  0.2886413633823395
Epoch  5 MSE:  0.28067171573638916
Epoch  6 MSE:  0.28030386567115784
Epoch  7 MSE:  0.2850654721260071
Epoch  8 MSE:  0.2872196435928345
Epoch  9 MSE:  0.2835320234298706
Epoch  10 MSE:  0.2761363387107849
Epoch  11 MSE:  0.26781147718429565
Epoch  12 MSE:  0.260130912065506
Epoch  13 MSE:  0.25334447622299194
Epoch  14 MSE:  0.24684424698352814
Epoch  15 MSE:  0.23962406814098358
Epoch  16 MSE:  0.23050367832183838
Epoch  17 MSE:  0.2181830257177353
Epoch  18 MSE:  0.20141609013080597
Epoch  19 MSE:  0.17948806285858154
Epoch  20 MSE:  0.15290845930576324
Epoch  21 MSE:  0.12405817955732346
Epoch  22 MSE:  0.09670815616846085
Epoch  23 MSE:  0.07285042107105255
Epoch  24 MSE:  0.0527329258620739
Epoch  25 MSE:  0.037791356444358826
Epoch  26 MSE:  0.029768405482172966
Epoch  27 MSE:  0.030134662985801697
Epoch  28 

## Prediction

In [164]:
# make predictions
y_test_pred = model(testX)

#Reshape to original data
y_train_pred = torch.reshape(y_train_pred,(y_train_pred.shape[0],y_train_pred.shape[1]))
trainY = torch.reshape(trainY,(trainY.shape[0],trainY.shape[1]))
y_test_pred = torch.reshape(y_test_pred,(y_test_pred.shape[0],y_test_pred.shape[1]))
testY = torch.reshape(testY,(testY.shape[0],testY.shape[1]))

In [165]:
#Invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(trainY.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(testY.detach().numpy())

In [166]:
print(f"y_test_shape : {y_test.shape}")
y_test      

y_test_shape : (9, 1)


array([[310.],
       [320.],
       [330.],
       [340.],
       [350.],
       [360.],
       [370.],
       [380.],
       [390.]], dtype=float32)

In [167]:
print(f"y_test_pred_shape : {y_test_pred.shape}")
y_test_pred

y_test_pred_shape : (9, 1)


array([[303.18906],
       [306.51974],
       [312.10962],
       [321.66388],
       [335.0034 ],
       [348.56177],
       [359.56076],
       [367.68277],
       [373.56143]], dtype=float32)

In [168]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 4.72 RMSE
Test Score: 14.03 RMSE


## Exercise for Univariate (Solution)

Using LSTM to create a model that can prdict the sales of shampoo for last 3 month

In [118]:
shampoo = pd.read_csv('../datasets/others/shampoo-sales.csv')
shampoo_ts = pd.Series(shampoo['sales'].values)
shampoo_ts.head() 
                            

0    266.0
1    145.9
2    183.1
3    119.3
4    180.3
dtype: float64

In [133]:
#split data by indexing 
split_data = 20
train_data_shampoo = shampoo_ts[:-split_data]
test_data_shampoo = shampoo_ts[-split_data:]
print("train_data_shape")
print(train_data.shape)
print("test_data_shape")
print(test_data.shape)
train_data_shampoo

train_data_shape
(20,)
test_data_shape
(20,)


0     266.0
1     145.9
2     183.1
3     119.3
4     180.3
5     168.5
6     231.8
7     224.5
8     192.8
9     122.9
10    336.5
11    185.9
12    194.3
13    149.5
14    210.1
15    273.3
dtype: float64

In [169]:
#Data Normalization
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized_shampoo = scaler.fit_transform(train_data_shampoo.reshape(-1, 1))

test_data_normalized_shampoo = scaler.fit_transform(test_data_shampoo.reshape(-1, 1))

AttributeError: 'Series' object has no attribute 'reshape'

In [135]:
#Data Sequencing 
trainX_shampoo ,trainY_shampoo =  univariate_single_step(train_data_normalized_shampoo,2)
testX_shampoo , testY_shampoo = univariate_single_step(test_data_normalized_shampoo,2)
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testX shape:{testX.shape}")
trainX

trainX shape:torch.Size([18, 2, 1]) trainY shape:torch.Size([18, 1])

testX shape:torch.Size([18, 2, 1]) testX shape:torch.Size([18, 2, 1])


tensor([[[-1.0000],
         [-0.8947]],

        [[-0.8947],
         [-0.7895]],

        [[-0.7895],
         [-0.6842]],

        [[-0.6842],
         [-0.5789]],

        [[-0.5789],
         [-0.4737]],

        [[-0.4737],
         [-0.3684]],

        [[-0.3684],
         [-0.2632]],

        [[-0.2632],
         [-0.1579]],

        [[-0.1579],
         [-0.0526]],

        [[-0.0526],
         [ 0.0526]],

        [[ 0.0526],
         [ 0.1579]],

        [[ 0.1579],
         [ 0.2632]],

        [[ 0.2632],
         [ 0.3684]],

        [[ 0.3684],
         [ 0.4737]],

        [[ 0.4737],
         [ 0.5789]],

        [[ 0.5789],
         [ 0.6842]],

        [[ 0.6842],
         [ 0.7895]],

        [[ 0.7895],
         [ 0.8947]]])

In [136]:
#Transfrom to Pytorch tensor
trainX_shampoo = torch.as_tensor(trainX_shampoo).float()
trainY_shampoo = torch.as_tensor(trainY_shampoo).float()
testX_shampoo = torch.as_tensor(testX_shampoo).float()
testY_shampoo = torch.as_tensor(testY_shampoo).float()
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testX shape:{testX.shape}")
trainX

trainX shape:torch.Size([18, 2, 1]) trainY shape:torch.Size([18, 1])

testX shape:torch.Size([18, 2, 1]) testX shape:torch.Size([18, 2, 1])


tensor([[[-1.0000],
         [-0.8947]],

        [[-0.8947],
         [-0.7895]],

        [[-0.7895],
         [-0.6842]],

        [[-0.6842],
         [-0.5789]],

        [[-0.5789],
         [-0.4737]],

        [[-0.4737],
         [-0.3684]],

        [[-0.3684],
         [-0.2632]],

        [[-0.2632],
         [-0.1579]],

        [[-0.1579],
         [-0.0526]],

        [[-0.0526],
         [ 0.0526]],

        [[ 0.0526],
         [ 0.1579]],

        [[ 0.1579],
         [ 0.2632]],

        [[ 0.2632],
         [ 0.3684]],

        [[ 0.3684],
         [ 0.4737]],

        [[ 0.4737],
         [ 0.5789]],

        [[ 0.5789],
         [ 0.6842]],

        [[ 0.6842],
         [ 0.7895]],

        [[ 0.7895],
         [ 0.8947]]])

In [137]:
#Arguments for LSTM model
hidden_dim = 32
number_of_time_series = 1 
timestep = 1
output_dim =1 
#1 for vanila LSTM , >1 is mean stacked LSTM
num_layers = 1

#model for vanila ,stack
model = LSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)
# model = BidirectionalLSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)
model.float()


LSTM(
  (lstm): LSTM(1, 32, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)

In [138]:
for t in range(num_epochs):
    # Initialise hidden state
    # Don't do this if you want your LSTM to be stateful
    # model.hidden = model.init_hidden()

    # Forward pass
    y_train_pred_shampoo = model(trainX_shampoo.float())
    # print("after transform y_train_pred.shape"+str(y_train_pred.shape))

    loss = loss_fn(y_train_pred_shampoo, trainY_shampoo)
    print("Epoch ", t, "MSE: ", loss.item())

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()

Epoch  0 MSE:  0.30704465508461
Epoch  1 MSE:  0.30704465508461
Epoch  2 MSE:  0.30704465508461
Epoch  3 MSE:  0.30704465508461
Epoch  4 MSE:  0.30704465508461
Epoch  5 MSE:  0.30704465508461
Epoch  6 MSE:  0.30704465508461
Epoch  7 MSE:  0.30704465508461
Epoch  8 MSE:  0.30704465508461
Epoch  9 MSE:  0.30704465508461
Epoch  10 MSE:  0.30704465508461
Epoch  11 MSE:  0.30704465508461
Epoch  12 MSE:  0.30704465508461
Epoch  13 MSE:  0.30704465508461
Epoch  14 MSE:  0.30704465508461
Epoch  15 MSE:  0.30704465508461
Epoch  16 MSE:  0.30704465508461
Epoch  17 MSE:  0.30704465508461
Epoch  18 MSE:  0.30704465508461
Epoch  19 MSE:  0.30704465508461
Epoch  20 MSE:  0.30704465508461
Epoch  21 MSE:  0.30704465508461
Epoch  22 MSE:  0.30704465508461
Epoch  23 MSE:  0.30704465508461
Epoch  24 MSE:  0.30704465508461
Epoch  25 MSE:  0.30704465508461
Epoch  26 MSE:  0.30704465508461
Epoch  27 MSE:  0.30704465508461
Epoch  28 MSE:  0.30704465508461
Epoch  29 MSE:  0.30704465508461
Epoch  30 MSE:  0.30

In [139]:
# make predictions
y_test_pred_shampoo = model(testX_shampoo)

#Reshape to original data
y_train_pred_shampoo = torch.reshape(y_train_pred_shampoo,(y_train_pred_shampoo.shape[0],y_train_pred_shampoo.shape[1]))
trainY_shampoo = torch.reshape(trainY_shampoo,(trainY_shampoo.shape[0],trainY_shampoo.shape[1]))
y_test_pred_shampoo = torch.reshape(y_test_pred_shampoo,(y_test_pred_shampoo.shape[0],y_test_pred_shampoo.shape[1]))
testY_shampoo = torch.reshape(testY,(testY_shampoo.shape[0],testY_shampoo.shape[1]))

In [140]:
#Invert predictions
y_train_pred_shampoo = scaler.inverse_transform(y_train_pred_shampoo.detach().numpy())
y_train_shampoo = scaler.inverse_transform(trainY_shampoo.detach().numpy())
y_test_pred_shampoo = scaler.inverse_transform(y_test_pred_shampoo.detach().numpy())
y_test_shampoo = scaler.inverse_transform(testY_shampoo.detach().numpy())

NotFittedError: This MinMaxScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [141]:
print(f"y_test_shape : {y_test_shampoo.shape}")
y_test_shampoo      

y_test_shape : (18, 1)


array([[220.     ],
       [230.     ],
       [239.99998],
       [250.     ],
       [260.     ],
       [270.     ],
       [280.     ],
       [290.     ],
       [300.     ],
       [310.     ],
       [320.     ],
       [330.     ],
       [340.     ],
       [350.     ],
       [360.     ],
       [370.     ],
       [380.     ],
       [390.     ]], dtype=float32)

In [130]:
print(f"y_test_pred_shape : {y_test_pred_shampoo.shape}")
y_test_pred_shampoo

y_test_pred_shape : (18, 1)


array([[289.69675],
       [289.72968],
       [289.7661 ],
       [289.8059 ],
       [289.84912],
       [289.8956 ],
       [289.94525],
       [289.99792],
       [290.05356],
       [290.11194],
       [290.17294],
       [290.2364 ],
       [290.3021 ],
       [290.3699 ],
       [290.4395 ],
       [290.51077],
       [290.58347],
       [290.6573 ]], dtype=float32)

In [131]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 57.68 RMSE
Test Score: 57.68 RMSE
