TO DO : 
- add prediction graph
- add loss/epoch graph

# Univariate LSTM

In [360]:
#import packages needed
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import math

In [361]:
#Data sequencing function 
def univariate_single_step(sequence, window_size):
    x, y = list(), list()
    for i in range(len(sequence)):
    # find the end of this pattern
        end_ix = i + window_size
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
    # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        x.append(seq_x)
        y.append(seq_y)
    return np.array(x), np.array(y)


## Hyper parameter

In [362]:
split_ratio = 0.70
num_epochs = 60


#seed
torch.manual_seed(123)

<torch._C.Generator at 0x201b9e1d590>

# Data Preparation

In [363]:
#Synthetic Data
univariate_series = np.array([x for x in range(0, 400, 10)])
print(univariate_series.shape)
univariate_series

(40,)


array([  0,  10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120,
       130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250,
       260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380,
       390])

## Data Splitting

In [364]:
split_data = round(len(univariate_series)*split_ratio)
split_data

28

In [365]:
#split data by indexing 
train_data = univariate_series[:split_data]
test_data = univariate_series[split_data:]
print("train_data_shape")
print(train_data.shape)
print("test_data_shape")
print(test_data.shape)

train_data_shape
(28,)
test_data_shape
(12,)


## Data Normalization

In [366]:
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data.reshape(-1, 1))

test_data_normalized = scaler.fit_transform(test_data.reshape(-1, 1))

## Data Sequencing

In [367]:
trainX ,trainY =  univariate_single_step(train_data_normalized,3)
testX , testY = univariate_single_step(test_data_normalized,3)
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testX shape:{testX.shape}")


trainX shape:(25, 3, 1) trainY shape:(25, 1)

testX shape:(9, 3, 1) testX shape:(9, 3, 1)


## Data Transform

In [368]:
#transfrom to Pytorch tensor
trainX = torch.as_tensor(trainX).float()
trainY = torch.as_tensor(trainY).float()
testX = torch.as_tensor(testX).float()
testY = torch.as_tensor(testY).float()
print(f"trainX shape:{trainX.shape} trainY shape:{trainY.shape}\n")
print(f"testX shape:{testX.shape} testX shape:{testX.shape}")

trainX shape:torch.Size([25, 3, 1]) trainY shape:torch.Size([25, 1])

testX shape:torch.Size([9, 3, 1]) testX shape:torch.Size([9, 3, 1])


In [369]:
print(f"Features are now in the shape of {trainX.shape} while labels are now in the shape of {trainY.shape}\n")
print("x-feature\n"+str(trainX.shape[0])+" = total number of data ")
print(str(trainX.shape[1])+" = window size ")
print(str(trainX.shape[2])+" = number of time series\n")
print("y-label\n"+str(trainY.shape[0])+" = number of data")
print(str(trainY.shape[1])+" = number of step\n")

Features are now in the shape of torch.Size([25, 3, 1]) while labels are now in the shape of torch.Size([25, 1])

x-feature
25 = total number of data 
3 = window size 
1 = number of time series

y-label
25 = number of data
1 = number of step



# LSTM Model Configuration 

## Vanila LSTM

In [370]:
class LSTM(nn.Module):

        def __init__(self, n_feature, hidden_dim, num_layers, output_dim):
            super(LSTM, self).__init__()

            self.n_feature = n_feature
            # Hidden dimensions
            self.hidden_dim = hidden_dim

            # Number of hidden layers
            self.num_layers = num_layers

            # Building your LSTM
            # batch_first=True causes input/output tensors to be of shape
            # (batch_dim, seq_dim, feature_dim)
            self.lstm = nn.LSTM(n_feature, hidden_dim, num_layers, batch_first=True)

            # Readout layer
            self.fc = nn.Linear(hidden_dim, output_dim)


        def forward(self, x):
            # Initialize hidden state with zeros
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

            # Initialize cell state
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()

            # One time step
            # We need to detach as we are doing truncated backpropagation through time (BPTT)
            # If we don't, we'll backprop all the way to the start even after going through another batch
            out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

            # Index hidden state of last time step
            # out.size() --> 100, 28, 100
            # out[:, -1, :] --> 100, 100 --> just want last time step hidden states!
            out = self.fc(out[:, -1, :])
            # out.size() --> 100, 10
            return out

## Bidirectional LSTM

In [429]:
class BidirectionalLSTM(nn.Module):

    def __init__(self, n_feature, hidden_dim, num_layers, output_dim):
        super(BidirectionalLSTM, self).__init__()

        self.n_feature = n_feature
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.num_layers = num_layers

        # Building your LSTM
        # batch_first=True causes input/output tensors to be of shape
        # (batch_dim, seq_dim, feature_dim)
        self.lstm = nn.LSTM(n_feature, hidden_dim, num_layers, batch_first=True,bidirectional=True)

        # Readout layer *2 for bidirectional LSTM
        self.fc = nn.Linear(hidden_dim*2, output_dim)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_dim).requires_grad_()

        # Initialize cell state
        c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_dim).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))

        # Index hidden state of last time step
        out = self.fc(out[:, -1, :])
        
        return out

## Input Model

In [485]:
#Arguments for LSTM model
hidden_dim = 10
number_of_time_series = 1 
timestep = 1
output_dim =1 

#1 for vanila LSTM , >1 is mean stacked LSTM
num_layers = 3 

#Vanila , Stacked LSTM
# model = LSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)

#Bidirectional LSTM
model = BidirectionalLSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)


In [478]:
#loss function 
loss_fn = torch.nn.MSELoss()

#optimiser
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

## Training

In [479]:
for t in range(num_epochs):
    # Initialise hidden state
#     Don't do this if you want your LSTM to be stateful
#     model.hidden = model.init_hidden()

    # Forward pass
    y_train_pred = model(trainX)
    # print("after transform y_train_pred.shape"+str(y_train_pred.shape))

    loss = loss_fn(y_train_pred, trainY)
    print("Epoch ", t, "MSE: ", loss.item())

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()

Epoch  0 MSE:  0.345790296792984
Epoch  1 MSE:  0.3182530701160431
Epoch  2 MSE:  0.29817891120910645
Epoch  3 MSE:  0.28501227498054504
Epoch  4 MSE:  0.2792040705680847
Epoch  5 MSE:  0.27944812178611755
Epoch  6 MSE:  0.2782609462738037
Epoch  7 MSE:  0.27061522006988525
Epoch  8 MSE:  0.2579366862773895
Epoch  9 MSE:  0.2427166998386383
Epoch  10 MSE:  0.22584863007068634
Epoch  11 MSE:  0.20653007924556732
Epoch  12 MSE:  0.18197958171367645
Epoch  13 MSE:  0.14810732007026672
Epoch  14 MSE:  0.10541140288114548
Epoch  15 MSE:  0.06541479378938675
Epoch  16 MSE:  0.045050300657749176
Epoch  17 MSE:  0.0393492691218853
Epoch  18 MSE:  0.03449425846338272
Epoch  19 MSE:  0.036943886429071426
Epoch  20 MSE:  0.05139799043536186
Epoch  21 MSE:  0.059045687317848206
Epoch  22 MSE:  0.0520176887512207
Epoch  23 MSE:  0.03793324530124664
Epoch  24 MSE:  0.025310097262263298
Epoch  25 MSE:  0.018119577318429947
Epoch  26 MSE:  0.016170918941497803
Epoch  27 MSE:  0.017468174919486046
Epoc

## Prediction

In [480]:
# make predictions
y_test_pred = model(testX)

#Reshape to original data
y_train_pred = torch.reshape(y_train_pred,(y_train_pred.shape[0],y_train_pred.shape[1]))
trainY = torch.reshape(trainY,(trainY.shape[0],trainY.shape[1]))
y_test_pred = torch.reshape(y_test_pred,(y_test_pred.shape[0],y_test_pred.shape[1]))
testY = torch.reshape(testY,(testY.shape[0],testY.shape[1]))

In [481]:
#Invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(trainY.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(testY.detach().numpy())

In [482]:
print("y-test\t\ty-predict")
for i in range(len(y_test_pred)):
    print(f"{y_test[i]}\t\t{y_test_pred[i]}")

y-test		y-predict
[522.1]		[507.70514]
[537.7]		[518.5679]
[553.3]		[532.7378]
[568.9]		[549.4688]
[584.5]		[567.1227]
[600.10004]		[584.54034]
[615.7]		[601.3206]
[631.3]		[616.9207]
[646.9]		[630.35736]


In [483]:
print(f"y_test_shape : {y_test.shape}")
print(f"y_test_pred_shape : {y_test_pred.shape}")

y_test_shape : (9, 1)
y_test_pred_shape : (9, 1)


In [484]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 3.44 RMSE
Test Score: 17.01 RMSE


# Exercise for Univariate (Solution)

Using LSTM to create a model that can predict lastest sales of shampoo

In [528]:
#Hyperparameter
num_epochs_shampoo = 100
split_ratio = 0.70

#Hidden Layer for LSTM
hidden_dim = 32

#seed
torch.manual_seed(123)

<torch._C.Generator at 0x201b9e1d590>

In [513]:
shampoo = pd.read_csv('../datasets/others/shampoo-sales.csv')
shampoo_ts =shampoo['sales']
shampoo_ts.head() 
                            

0    266.0
1    145.9
2    183.1
3    119.3
4    180.3
Name: sales, dtype: float64

In [514]:
#split data by indexing 
split_data = round(len(shampoo_ts)*split_ratio)
split_data

25

In [515]:
train_data_shampoo = shampoo_ts[:split_data]
test_data_shampoo = shampoo_ts[split_data:]
print("train_data_shampoo_shape")
print(train_data_shampoo.shape)
print("test_data_shampoo_shape")
print(test_data_shampoo.shape)

train_data_shampoo_shape
(25,)
test_data_shampoo_shape
(11,)


In [516]:
#Data Normalization

#Reshape before normalize
train_data_shampoo = train_data_shampoo.values.reshape(-1, 1)
test_data_shampoo = test_data_shampoo.values.reshape((-1, 1))

#Build Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized_shampoo = scaler.fit_transform(train_data_shampoo)

test_data_normalized_shampoo = scaler.fit_transform(test_data_shampoo)
train_data_normalized_shampoo[1:6]

array([[-0.82401588],
       [-0.57790275],
       [-1.        ],
       [-0.59642739],
       [-0.67449553]])

In [517]:
#Data Sequencing 
trainX_shampoo ,trainY_shampoo =  univariate_single_step(train_data_normalized_shampoo,2)
testX_shampoo , testY_shampoo = univariate_single_step(test_data_normalized_shampoo,2)
print(f"trainX shape:{trainX_shampoo.shape} trainY shape:{trainY_shampoo.shape}\n")
print(f"testX shape:{testX_shampoo.shape} testX shape:{testY_shampoo.shape}")

trainX shape:(23, 2, 1) trainY shape:(23, 1)

testX shape:(9, 2, 1) testX shape:(9, 1)


In [518]:
#Transfrom numpy to Pytorch tensor
trainX_shampoo = torch.as_tensor(trainX_shampoo).float()
trainY_shampoo = torch.as_tensor(trainY_shampoo).float()
testX_shampoo = torch.as_tensor(testX_shampoo).float()
testY_shampoo = torch.as_tensor(testY_shampoo).float()
print(f"trainX shape:{trainX_shampoo.shape} trainY shape:{trainY_shampoo.shape}\n")
print(f"testX shape:{testX_shampoo.shape} testX shape:{testY_shampoo.shape}")


trainX shape:torch.Size([23, 2, 1]) trainY shape:torch.Size([23, 1])

testX shape:torch.Size([9, 2, 1]) testX shape:torch.Size([9, 1])


In [536]:
#Arguments for LSTM model
number_of_time_series = 1 
timestep = 1
output_dim =1 
#1 for vanila LSTM , >1 is mean stacked LSTM
num_layers = 1

#Vanila ,Stacked LSTM
# model_shampoo = LSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)

#Bidirectional LSTM
model_shampoo = BidirectionalLSTM(n_feature=number_of_time_series, hidden_dim=hidden_dim, output_dim=timestep, num_layers=num_layers)
model_shampoo.float()


BidirectionalLSTM(
  (lstm): LSTM(1, 32, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [537]:
loss_fn_shampoo = torch.nn.MSELoss()

optimiser_shampoo = torch.optim.Adam(model_shampoo.parameters(), lr=0.01)

In [538]:
for t in range(num_epochs_shampoo):
    # Initialise hidden state
    # Don't do this if you want your LSTM to be stateful
    # model.hidden = model.init_hidden()

    # Forward pass
    y_train_pred_shampoo = model_shampoo(trainX_shampoo)
    loss_shampoo = loss_fn_shampoo(y_train_pred_shampoo, trainY_shampoo)
    print("Epoch ", t, "MSE: ", loss_shampoo.item())

    # Zero out gradient, else they will accumulate between epochs
    optimiser_shampoo.zero_grad()

    # Backward pass
    loss_shampoo.backward()

    # Update parameters
    optimiser_shampoo.step()

Epoch  0 MSE:  0.2840895652770996
Epoch  1 MSE:  0.26358506083488464
Epoch  2 MSE:  0.2513701319694519
Epoch  3 MSE:  0.2455216944217682
Epoch  4 MSE:  0.24335384368896484
Epoch  5 MSE:  0.240908682346344
Epoch  6 MSE:  0.23643222451210022
Epoch  7 MSE:  0.2305239588022232
Epoch  8 MSE:  0.22434689104557037
Epoch  9 MSE:  0.2187640517950058
Epoch  10 MSE:  0.21409872174263
Epoch  11 MSE:  0.21003389358520508
Epoch  12 MSE:  0.20569400489330292
Epoch  13 MSE:  0.20033761858940125
Epoch  14 MSE:  0.19427035748958588
Epoch  15 MSE:  0.18886412680149078
Epoch  16 MSE:  0.1854628473520279
Epoch  17 MSE:  0.1835416704416275
Epoch  18 MSE:  0.18142260611057281
Epoch  19 MSE:  0.17933984100818634
Epoch  20 MSE:  0.17924568057060242
Epoch  21 MSE:  0.18108268082141876
Epoch  22 MSE:  0.1816246062517166
Epoch  23 MSE:  0.18050965666770935
Epoch  24 MSE:  0.1792011260986328
Epoch  25 MSE:  0.17752191424369812
Epoch  26 MSE:  0.17484575510025024
Epoch  27 MSE:  0.1721193939447403
Epoch  28 MSE:  0

In [539]:
#Make predictions
y_test_pred_shampoo = model_shampoo(testX_shampoo)

#Reshape to original data
y_train_pred_shampoo = torch.reshape(y_train_pred_shampoo,(y_train_pred_shampoo.shape[0],y_train_pred_shampoo.shape[1]))
trainY_shampoo = torch.reshape(trainY_shampoo,(trainY_shampoo.shape[0],trainY_shampoo.shape[1]))
y_test_pred_shampoo = torch.reshape(y_test_pred_shampoo,(y_test_pred_shampoo.shape[0],y_test_pred_shampoo.shape[1]))
testY_shampoo = torch.reshape(testY_shampoo,(testY_shampoo.shape[0],testY_shampoo.shape[1]))

In [540]:
#Invert predictions
y_train_pred_shampoo = scaler.inverse_transform(y_train_pred_shampoo.detach().numpy())
y_train_shampoo = scaler.inverse_transform(trainY_shampoo.detach().numpy())
y_test_pred_shampoo = scaler.inverse_transform(y_test_pred_shampoo.detach().numpy())
y_test_shampoo = scaler.inverse_transform(testY_shampoo.detach().numpy())

In [541]:
y_test_shampoo.shape

(9, 1)

In [542]:
print("y-test\t\ty-predict")
for i in range(len(y_test_shampoo)):
    print(f"{y_test_shampoo[i]}\t\t{y_test_pred_shampoo[i]}")

y-test		y-predict
[439.3]		[424.71152]
[401.30002]		[393.35703]
[437.4]		[437.54062]
[575.5]		[425.14606]
[407.60004]		[481.2224]
[682.]		[496.14194]
[475.3]		[464.71143]
[581.3]		[566.05054]
[646.9]		[524.4608]


In [543]:
print(f"y_test_shape : {y_test_shampoo.shape}")   
print(f"y_test_pred_shape : {y_test_pred_shampoo.shape}")

y_test_shape : (9, 1)
y_test_pred_shape : (9, 1)


In [544]:
#calculate root mean squared error
trainScore_shampoo = math.sqrt(mean_squared_error(y_train_shampoo[:,0], y_train_pred_shampoo[:,0]))
print('Train Score: %.2f RMSE' % (trainScore_shampoo))
testScore_shampoo = math.sqrt(mean_squared_error(y_test_shampoo[:,0], y_test_pred_shampoo[:,0]))
print('Test Score: %.2f RMSE' % (testScore_shampoo))

Train Score: 70.49 RMSE
Test Score: 93.20 RMSE
