Univariate LSTM Models


1- Vanilla LSTM

2- Stacked LSTM

3- Bidirectional LSTM

4- CNN LSTM

5- ConvLSTM

In [None]:
# univariate stacked lstm example
import pandas as pd
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Bidirectional
from keras.layers import RepeatVector

#for CNN and Conv LSTM
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import ConvLSTM2D

In [None]:
#@title
try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

if IN_COLAB:
  print("We're running Colab")

#@title
if IN_COLAB:
  # Mount the Google Drive at mount
  mount='/content/gdrive'
  print("Colab: mounting Google drive on ", mount)

  drive.mount(mount)

  # Switch to the directory on the Google Drive that you want to use
  import os
  drive_root = mount + "/My Drive/Colab Notebooks/forecasting/"
  
  # Create drive_root if it doesn't exist
  create_drive_root = True
  if create_drive_root:
    print("\nColab: making sure ", drive_root, " exists.")
    os.makedirs(drive_root, exist_ok=True)
  
  # Change to the directory
  print("\nColab: Changing directory to ", drive_root)
  %cd $drive_root

We're running Colab
Colab: mounting Google drive on  /content/gdrive
Mounted at /content/gdrive

Colab: making sure  /content/gdrive/My Drive/Colab Notebooks/forecasting/  exists.

Colab: Changing directory to  /content/gdrive/My Drive/Colab Notebooks/forecasting/
/content/gdrive/My Drive/Colab Notebooks/forecasting


In [None]:
series = pd.read_csv('data.csv', header=0, parse_dates=[0], index_col=0, squeeze=True)
df = pd.DataFrame(series)
print(df)


            Nurtec Rx
date_week            
2020-09-04   4108.538
2020-09-11   3200.286
2020-09-18   3966.593
2020-09-25   3935.653
2020-10-02   4093.173
...               ...
2022-07-29   6780.315
2022-08-05   6878.218
2022-08-12   7069.195
2022-08-19   7132.550
2022-08-26   7437.977

[104 rows x 1 columns]


In [None]:

# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [None]:
#  define input sequence
raw_seq= df['Nurtec Rx']
# choose a number of time steps
n_steps = 3
n_features = 1
epochs = 200

# split into samples
X, y = split_sequence(raw_seq, n_steps)

# reshape from [samples, timesteps] into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], n_features))

In [None]:
df1 = df.iloc[-(n_steps): , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )
x_input

array([7069.195, 7132.55 , 7437.977])

**Vanila LSTM**

In [None]:


# define model
vanila_model = Sequential()
vanila_model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
vanila_model.add(Dense(1))
vanila_model.compile(optimizer='adam', loss='mse')

# fit model
vanila_model.fit(X, y, epochs= epochs, verbose=0)

# demonstrate prediction. -----Same for Vanila and Stacked & Bidirectional
df1 = df.iloc[-(n_steps): , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )

x_input = x_input.reshape((1, n_steps, n_features))
yhat = vanila_model.predict(x_input, verbose=0)
print(yhat)

[[7273.626]]


**Stacked LSTM**

In [None]:
# define model
seq_model = Sequential()
seq_model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
seq_model.add(LSTM(50, activation='relu'))
seq_model.add(Dense(1))
seq_model.compile(optimizer='adam', loss='mse')


# fit model
seq_model.fit(X, y, epochs= epochs, verbose=0)

# demonstrate prediction. -----Same for Vanila and Stacked & Bidirectional
df1 = df.iloc[-(n_steps): , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )

x_input = x_input.reshape((1, n_steps, n_features))
yhat = seq_model.predict(x_input, verbose=0)
print(yhat)


[[7134.1636]]


**Bidirectional LSTM**

In [None]:
# define model
bi_model = Sequential()
bi_model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(n_steps, n_features)))
bi_model.add(Dense(1))
bi_model.compile(optimizer='adam', loss='mse')

# fit model
bi_model.fit(X, y, epochs= epochs, verbose=0)
# demonstrate prediction. -----Same for Vanila and Stacked & Bidirectional
df1 = df.iloc[-(n_steps): , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )

x_input = x_input.reshape((1, n_steps, n_features))
yhat = bi_model.predict(x_input, verbose=0)
print(yhat)



[[7232.447]]


**CNN LSTM**

In [None]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)
 

In [None]:
raw_seq= df['Nurtec Rx']
n_steps = 4
# split into samples
X, y = split_sequence(raw_seq, n_steps)

# reshape from [samples, timesteps] into [samples, subsequences, timesteps, features]
n_features = 1
n_seq = 2
n_steps = 2
epochs = 500
X = X.reshape((X.shape[0], n_seq, n_steps, n_features))

In [None]:
# define model
cnn_model = Sequential()
cnn_model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, n_steps, n_features)))
cnn_model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
cnn_model.add(TimeDistributed(Flatten()))
cnn_model.add(LSTM(50, activation='relu'))
cnn_model.add(Dense(1))
cnn_model.compile(optimizer='adam', loss='mse')

# fit model
cnn_model.fit(X, y, epochs=epochs, verbose=0)

<keras.callbacks.History at 0x7f18c9a65b10>

In [None]:
# demonstrate prediction CNN
df1 = df.iloc[-4: , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )

x_input = x_input.reshape((1, n_seq, n_steps, n_features))

yhat = cnn_model.predict(x_input, verbose=0)
print(yhat)



[[7029.7334]]


**Conv LSTM**

The layer expects input as a sequence of two-dimensional images, therefore the shape of input data must be:

[samples, timesteps, rows, columns, features]

In [None]:
# define input sequence
raw_seq= df['Nurtec Rx']
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(raw_seq, n_steps)

# reshape from [samples, timesteps] into [samples, timesteps, rows, columns, features]
n_features = 1
n_seq = 2
n_steps = 2
epochs = 500
X = X.reshape((X.shape[0], n_seq, 1, n_steps, n_features))

In [None]:
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu', input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=epochs, verbose=0)

x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[7143.6904]]


In [None]:
# demonstrate prediction
df1 = df.iloc[-4: , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )

x_input = x_input.reshape((1, n_seq, 1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[7143.6904]]


**Multi-Step LSTM Models**

A time series forecasting problem that requires a prediction of multiple time steps into the future can be referred to as multi-step time series forecasting.

Specifically, these are problems where the forecast horizon or interval is more than one time step.

There are two main types of LSTM models that can be used for multi-step forecasting; they are:

1- Vector Output Model

2- Encoder-Decoder Model

Before we look at these models, let’s first look at the preparation of data for multi-step forecasting.

In [None]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the sequence
		if out_end_ix > len(sequence):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

# define input sequence
raw_seq= df['Nurtec Rx']

# choose a number of time steps
n_steps_in, n_steps_out = 3, 2

# split into samples
X, y = split_sequence(raw_seq, n_steps_in, n_steps_out)
# summarize the data
# for i in range(len(X)):
# 	print(X[i], y[i])

**Vector Output Model**

The LSTM expects data to have a three-dimensional structure of 

[samples, timesteps, features]

In [None]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))

# define model
vector_model = Sequential()
vector_model.add(LSTM(100, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features)))
vector_model.add(LSTM(100, activation='relu'))
vector_model.add(Dense(n_steps_out))
vector_model.compile(optimizer='adam', loss='mse')

# fit model
vector_model.fit(X, y, epochs=50, verbose=0)

# demonstrate prediction
df1 = df.iloc[-3: , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )

x_input = x_input.reshape((1, n_steps_in, n_features))
yhat = vector_model.predict(x_input, verbose=0)
print(yhat)

[[7269.6196 7357.276 ]]


**Encoder-Decoder Model**

A model specifically developed for forecasting variable length output sequences is called the Encoder-Decoder LSTM.

The model was designed for prediction problems where there are both input and output sequences, so-called sequence-to-sequence, or seq2seq problems, such as translating text from one language to another.

This model can be used for multi-step time series forecasting.

In [None]:
# define input sequence
raw_seq= df['Nurtec Rx']
# choose a number of time steps
n_steps_in, n_steps_out = 3, 2
# split into samples
X, y = split_sequence(raw_seq, n_steps_in, n_steps_out)
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
y = y.reshape((y.shape[0], y.shape[1], n_features))

In [None]:
# define model
encoder_model = Sequential()
encoder_model.add(LSTM(100, activation='relu', input_shape=(n_steps_in, n_features)))
encoder_model.add(RepeatVector(n_steps_out))
encoder_model.add(LSTM(100, activation='relu', return_sequences=True))
encoder_model.add(TimeDistributed(Dense(1)))
encoder_model.compile(optimizer='adam', loss='mse')

# fit model
encoder_model.fit(X, y, epochs=100, verbose=0)


<keras.callbacks.History at 0x7f18c7546690>

In [None]:

# demonstrate prediction

df1 = df.iloc[-3: , :]
x_input = df1['Nurtec Rx']
x_input = array(x_input )

x_input = x_input.reshape((1, n_steps_in, n_features))
yhat = encoder_model.predict(x_input, verbose=0)
print(yhat)

[[[7254.747]
  [7371.073]]]
