In [1]:
# This code is inspired from https://www.kaggle.com/code/kmkarakaya/lstm-output-types-return-sequences-state

In [2]:
from tensorflow.keras import models
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Input
from tensorflow.keras import Input

# 1. LSTM Default return value:

In [3]:
# Output is only the hidden state at the last time step.

# Because return_sequences and return_states parameters are default (False).

# The size of output is 2D array of real numbers.

# The first dimension is indicating the number of samples in the batch given to the LSTM layer

# The second dimension is the dimensionality of the output space defined by the units parameter in Keras LSTM implementation.

# Example Code

In [4]:
# Since, in the following examples, the LSTM unit parameter (dimensionality of the output space) is set to 5, 
# the last hidden state will have a dimension of 5.

# Therefore, the Output Shape becomes (None, 5) & output is a tensor for 5 real numbers for each sample in the batch!
# None is placeholder for the batch_size

In [7]:
timesteps = 3  # T
features = 4   # D
LSTMunits= 5  # Hidden size
Batchsize = 1

X = np.random.randn(Batchsize, timesteps, features) # N x T x D

In [8]:
input =Input(shape = (timesteps, features))
hidden = LSTM(LSTMunits) (input)
model1 = Model(inputs = input, outputs = hidden)
model1.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 3, 4)]            0         
                                                                 
 lstm (LSTM)                 (None, 5)                 200       
                                                                 
Total params: 200
Trainable params: 200
Non-trainable params: 0
_________________________________________________________________


In [9]:
output = model1.predict(X) # output is only the hidden state of the last time step.
print('input shape:  ', X.shape) 
print(" ")
print(output)
print(" ")
print('output shape: ', output.shape)      # Batchsize x Hiddensize

input shape:   (1, 3, 4)
 
[[ 0.30890232 -0.01780718  0.3366223  -0.07549541  0.05742521]]
 
output shape:  (1, 5)


# 2. LSTM return_sequences = True 

In [10]:
# When return_sequences parameter is True, it will output all the hidden states of each time steps.

# The ouput is a 3D array of real numbers.

# The first dimension is indicating the number of samples in the batch given to the LSTM layer

# The second dimension is the number of time steps in the input sequence. By indexing second dimension you can access all 
# the hidden states of the units at a given time step

# The third dimension is the dimensionality of the output space defined by the units parameter in Keras LSTM implementation.

# The content of the array is all the hidden states of each time steps of the LSTM layer

# Example Code

In [11]:
# Since we have 3 time steps and unit (dimensionality of the output space) is set to 5, the output shape will be (None, 3, 5).

# Because LSTM returns 1 hidden state for each time step.

In [12]:
input = Input(shape = (timesteps, features))
all_hidden_states = LSTM(LSTMunits, return_sequences = True) (input)
model2 = Model(inputs = input, outputs = all_hidden_states)
model2.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 3, 4)]            0         
                                                                 
 lstm_1 (LSTM)               (None, 3, 5)              200       
                                                                 
Total params: 200
Trainable params: 200
Non-trainable params: 0
_________________________________________________________________


In [13]:
output = model2.predict(X)

print('input shape:  ', X.shape)
print('all_hidden_states shape: ', output.shape)
print('\nhidden states for the first sample: \n', output[0]) # we only have one sample
print('\nhidden states for the first sample at the second time step: \n', output[0][1])

input shape:   (1, 3, 4)
all_hidden_states shape:  (1, 3, 5)

hidden states for the first sample: 
 [[ 0.03259256 -0.01499919  0.16842824 -0.10798944 -0.00736986]
 [ 0.06433664 -0.14843813  0.0860649  -0.13666266 -0.06487611]
 [ 0.1681296   0.02044299  0.14556997 -0.3427916   0.08813762]]

hidden states for the first sample at the second time step: 
 [ 0.06433664 -0.14843813  0.0860649  -0.13666266 -0.06487611]


# 3. LSTM return_state=True

In [14]:
# When return_state parameter is True, it will output the last hidden state twice and 
# the last cell state as the output from LSTM layer.

# The ouput is a three 2D-arrays of real numbers.

# The first dimension is indicating the number of samples (batch size) given to the LSTM layer
# The second dimension is the dimensionality of the output space defined by unit parameter in the Keras LSTM layer.

# It returns 3 arrays in the result:

# 1. The LSTM hidden state of the last time step: (None, 5) It is 5 because dimensionality of the output space (unit parameter)
#     is set to 5.

# 2. The LSTM hidden state of the last time step (again):(None, )

# 3. The LSTM cell state of the last time step: (None, 5) refers last cell state value whose dimensionality of the output space
#     (unit parameter) is set to 5.

# Example Code

In [15]:
# Since we set unit parameter (dimensionality of the output space) to 5, the output shape will be (None, 16) for all 3 tensors.

In [16]:
input =Input(shape=(timesteps, features))
output, hidden, cell= LSTM(LSTMunits, return_state=True) (input)
model3 = Model(inputs = input, outputs = [output, hidden, cell])
model3.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 3, 4)]            0         
                                                                 
 lstm_2 (LSTM)               [(None, 5),               200       
                              (None, 5),                         
                              (None, 5)]                         
                                                                 
Total params: 200
Trainable params: 200
Non-trainable params: 0
_________________________________________________________________


In [18]:
results = model3.predict(X)
results = np.array(results)
output, hidden_state, cell_state = results

print('input shape:  ', X.shape)
print('result is three 2D-array: ', results.shape)
print('\nLSTM_output is in the first array: ', results[0].shape)
print('\nLSTM_output : ', output)
print('\nhidden_state which is exactly the same with LSTM_output is in the second array: ', results[1].shape)
print('\nHidden state: ', hidden_state)
print('\nCell state: ', cell_state)

input shape:   (1, 3, 4)
result is three 2D-array:  (3, 1, 5)

LSTM_output is in the first array:  (1, 5)

LSTM_output :  [[-0.20883691  0.03200638  0.15348771 -0.15001658  0.2003983 ]]

hidden_state which is exactly the same with LSTM_output is in the second array:  (1, 5)

Hidden state:  [[-0.20883691  0.03200638  0.15348771 -0.15001658  0.2003983 ]]

Cell state:  [[-0.62054557  0.08804745  0.1777096  -0.5262824   0.26693702]]


# 4. LSTM return_state = True   and    return_sequences = True 

In [19]:
# LSTM layer returns 3 results:

# (as return_sequences=True)
# the hidden states for each input time step,

# (as return_state=True)
# the hidden state output for the last time step and
# the cell state for the last time step.

In [20]:
input = Input(shape = (timesteps, features))
all_hidden_states, hidden_state, cell_state = LSTM(LSTMunits, return_sequences = True, return_state=True) (input)
model4 = Model(inputs = input, outputs = [all_hidden_states, hidden_state, cell_state])
model4.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 3, 4)]            0         
                                                                 
 lstm_3 (LSTM)               [(None, 3, 5),            200       
                              (None, 5),                         
                              (None, 5)]                         
                                                                 
Total params: 200
Trainable params: 200
Non-trainable params: 0
_________________________________________________________________


In [27]:
results=model4.predict(X)
all_hidden_states, hidden_state, cell_state = results

print('input shape:  ', X.shape)
print('result is three 2D-array len (results): ', len (results))
print('\nall_hidden_states is in the first array: ', results[0].shape)
print('\n\hidden_states of each time steps : \n\n', results[0])
print('\nhidden_state of last time step: ', results[1].shape)
print('\nhidden_state: ', results[1])
print('\ncell_state of last time step: ', results[2])

input shape:   (1, 3, 4)
result is three 2D-array len (results):  3

all_hidden_states is in the first array:  (1, 3, 5)

\hidden_states of each time steps : 

 [[[-0.21957731  0.20638798 -0.03843133  0.06899611  0.08460483]
  [-0.29204807  0.26578736  0.12555666 -0.07890295  0.18245336]
  [-0.489339    0.3030822  -0.07513437 -0.452564    0.23820889]]]

hidden_state of last time step:  (1, 5)

hidden_state:  [[-0.489339    0.3030822  -0.07513437 -0.452564    0.23820889]]

cell_state of last time step:  [[-1.0738897   0.5033318  -0.1135129  -0.7409135   0.35720763]]
