In [None]:
# This code is inspired from https://www.kaggle.com/code/kmkarakaya/lstm-output-types-return-sequences-state

In [2]:
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Input

# 1. LSTM Default values (return_sequences = False and return_state = False)

In [3]:
# The output is 2D array is only the hidden state at the last time step.

# The first dimension is indicating the number of samples in the batch given to the LSTM layer

# The second dimension is the number of LSTM units (Hidden Size)

### Set the parameters

In [4]:
timesteps = 3  # T
features = 4   # D
LSTMunits= 5  # Hidden size
Batchsize = 1

### Create data for LSTM

In [5]:
X = np.random.randn(Batchsize, timesteps, features) # N x T x D

In [6]:
print('input shape:  ', X.shape)

input shape:   (1, 3, 4)


### Create LSTM Model

In [7]:
input =Input(shape = (timesteps, features)) # T x D
hidden = LSTM(LSTMunits) (input)
model1 = Model(inputs = input, outputs = hidden)
model1.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 3, 4)]            0         
_________________________________________________________________
lstm (LSTM)                  (None, 5)                 200       
Total params: 200
Trainable params: 200
Non-trainable params: 0
_________________________________________________________________


### Get the output from LSTM Model

In [8]:
output = model1.predict(X) # output is only the hidden state of the last time step.

print(output)
print(" ")
print('output shape: ', output.shape)      # Batchsize x Hiddensize

[[-0.02056677  0.16800289  0.0337401  -0.24020684  0.03921733]]
 
output shape:  (1, 5)


In [12]:
import tensorflow as tf
tf.config.list_physical_devices
tf.debugging.set_log_device_placement

<function tensorflow.python.eager.context.set_log_device_placement(enabled)>

In [13]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [15]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6322926339897261814
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 2956705792
locality {
  bus_id: 1
  links {
  }
}
incarnation: 8339647039417479246
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 860M, pci bus id: 0000:01:00.0, compute capability: 5.0"
]


In [16]:
import tensorflow as tf

assert tf.test.is_gpu_available()
assert tf.test.is_built_with_cuda()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


In [17]:
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device: /device:GPU:0


In [18]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7791446122112060286
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 2956705792
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4415757069412807661
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 860M, pci bus id: 0000:01:00.0, compute capability: 5.0"
]


# 2. LSTM (return_sequences = True)

In [None]:
# When return_sequences parameter is True, we get sequences at each time steps and for all input samples.

# The ouput is a 3D array of real numbers.

# The first dimension is indicating the number of samples in the batch given to the LSTM layer

# The second dimension is the number of time steps

# The third dimension is the dimensionality of the LSTM units (hidden size)


### Create LSTM

In [8]:
input = Input(shape = (timesteps, features))
all_hidden_states = LSTM(LSTMunits, return_sequences = True) (input)
model2 = Model(inputs = input, outputs = all_hidden_states)
model2.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 3, 4)]            0         
                                                                 
 lstm_1 (LSTM)               (None, 3, 5)              200       
                                                                 
Total params: 200 (800.00 Byte)
Trainable params: 200 (800.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Create data

In [9]:
X = np.random.randn(2, timesteps, features) # N x T x D
print('input shape:  ', X.shape)

input shape:   (2, 3, 4)


### Get output from LSTM

In [10]:
output = model2.predict(X)

print('\nall_hidden_states shape: ', output.shape)
print('\n Output: ', output)
print('\nhidden states for the first sample: \n', output[0])
print('\nhidden states for the first sample at the second time step: \n', output[0][1])


all_hidden_states shape:  (2, 3, 5)

 Output:  [[[ 0.10083209 -0.15288177  0.01566504 -0.15015815 -0.01646536]
  [ 0.24916919 -0.01497812  0.01466065  0.02890082 -0.26682353]
  [ 0.27829593  0.11492091 -0.02434807 -0.0605044  -0.2961077 ]]

 [[ 0.03940946  0.01170149  0.12448808 -0.05664926 -0.03248735]
  [-0.10095877  0.05751386 -0.31575504  0.01774889  0.25476792]
  [ 0.14235444 -0.08697105 -0.10362633 -0.06778127  0.07948091]]]

hidden states for the first sample: 
 [[ 0.10083209 -0.15288177  0.01566504 -0.15015815 -0.01646536]
 [ 0.24916919 -0.01497812  0.01466065  0.02890082 -0.26682353]
 [ 0.27829593  0.11492091 -0.02434807 -0.0605044  -0.2961077 ]]

hidden states for the first sample at the second time step: 
 [ 0.24916919 -0.01497812  0.01466065  0.02890082 -0.26682353]


# 3. LSTM (return_state=True)

In [None]:
# When return_state parameter is True, it will output the last hidden state twice and
# the last cell state as the output from LSTM layer.

# The ouput is a three 2D-arrays of real numbers.

# The first dimension is indicating the number of samples (batch size) given to the LSTM layer
# The second dimension is the dimensionality of the LSTM units

### Create a Model

In [11]:
input =Input(shape=(timesteps, features))
output, hidden, cell= LSTM(LSTMunits, return_state=True) (input)
model3 = Model(inputs = input, outputs = [output, hidden, cell])
model3.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 3, 4)]            0         
                                                                 
 lstm_2 (LSTM)               [(None, 5),               200       
                              (None, 5),                         
                              (None, 5)]                         
                                                                 
Total params: 200 (800.00 Byte)
Trainable params: 200 (800.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Create data

In [12]:
X = np.random.randn(Batchsize, timesteps, features) # N x T x D
print('input shape:  ', X.shape)

input shape:   (1, 3, 4)


### Get the output from LSTM

In [13]:
results = model3.predict(X)
results = np.array(results)
output, hidden_state, cell_state = results

print('\nresult is three 2D-array: ', results.shape)
print('\nresult: ', results)
print('\nLSTM_output is in the first array: ', results[0].shape)
print('\nLSTM_output : ', output)
print('\nhidden_state which is exactly the same with LSTM_output is in the second array: ', results[1].shape)
print('\nHidden state: ', hidden_state)
print('\nCell state: ', cell_state)


result is three 2D-array:  (3, 1, 5)

result:  [[[-0.14993031 -0.10603292  0.0033124  -0.07149383  0.03066481]]

 [[-0.14993031 -0.10603292  0.0033124  -0.07149383  0.03066481]]

 [[-0.28763914 -0.18877642  0.00679129 -0.16394566  0.05503772]]]

LSTM_output is in the first array:  (1, 5)

LSTM_output :  [[-0.14993031 -0.10603292  0.0033124  -0.07149383  0.03066481]]

hidden_state which is exactly the same with LSTM_output is in the second array:  (1, 5)

Hidden state:  [[-0.14993031 -0.10603292  0.0033124  -0.07149383  0.03066481]]

Cell state:  [[-0.28763914 -0.18877642  0.00679129 -0.16394566  0.05503772]]


# 4. LSTM (return_state = True   and    return_sequences = True)

In [None]:
# LSTM layer returns 3 results:

# (as return_sequences=True)
# the hidden states for each time step,

# (as return_state=True)
# the hidden state output for the last time step and
# the cell state for the last time step.

### Create LSTM Model

In [14]:
input = Input(shape = (timesteps, features))
all_hidden_states, hidden_state, cell_state = LSTM(LSTMunits, return_sequences = True, return_state=True) (input)
model4 = Model(inputs = input, outputs = [all_hidden_states, hidden_state, cell_state])
model4.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 3, 4)]            0         
                                                                 
 lstm_3 (LSTM)               [(None, 3, 5),            200       
                              (None, 5),                         
                              (None, 5)]                         
                                                                 
Total params: 200 (800.00 Byte)
Trainable params: 200 (800.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Create data

In [15]:
X = np.random.randn(Batchsize, timesteps, features) # N x T x D
print('input shape:  ', X.shape)

input shape:   (1, 3, 4)


### Get output from the model

In [16]:
results=model4.predict(X)
all_hidden_states, hidden_state, cell_state = results

print('\n\hidden_states of each time steps : \n\n', results[0])
print('\nhidden_state of last time step: ', results[1])
print('\ncell_state of last time step: ', results[2])


\hidden_states of each time steps : 

 [[[ 0.15782464 -0.1924259  -0.00517165  0.07699124  0.02152641]
  [ 0.13936658 -0.04136454  0.10534947  0.11949696 -0.09500428]
  [ 0.14644516  0.03773384  0.18532775  0.05499754 -0.13746053]]]

hidden_state of last time step:  [[ 0.14644516  0.03773384  0.18532775  0.05499754 -0.13746053]]

cell_state of last time step:  [[ 0.3618045   0.08639355  0.40559     0.10301916 -0.20620197]]
