In [1]:
# In a many-to-one sequence problem we have an input where each time-steps consists of multiple features. 
# The output can be a single value or multiple values, one per feature in the input time step. 
# We will cover both the cases in this section.

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
# Creating the Dataset
# Our dataset will contain 15 samples. Each sample will consist of 3 time-steps. Each time-steps will have two features.

# Let's create two lists. One will contain multiples of 3 until 135 i.e. 45 elements in total.
# The second list will contain multiples of 5, from 1 to 225.
# The second list will also contain 45 elements in total. The following script creates these two lists:

In [4]:
X1 = np.array([x+3 for x in range(0, 135, 3)])
print(X1)

X2 = np.array([x+5 for x in range(0, 225, 5)])
print(X2)

[  3   6   9  12  15  18  21  24  27  30  33  36  39  42  45  48  51  54
  57  60  63  66  69  72  75  78  81  84  87  90  93  96  99 102 105 108
 111 114 117 120 123 126 129 132 135]
[  5  10  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85  90
  95 100 105 110 115 120 125 130 135 140 145 150 155 160 165 170 175 180
 185 190 195 200 205 210 215 220 225]


In [5]:
X = np.column_stack((X1, X2))
print(X)

[[  3   5]
 [  6  10]
 [  9  15]
 [ 12  20]
 [ 15  25]
 [ 18  30]
 [ 21  35]
 [ 24  40]
 [ 27  45]
 [ 30  50]
 [ 33  55]
 [ 36  60]
 [ 39  65]
 [ 42  70]
 [ 45  75]
 [ 48  80]
 [ 51  85]
 [ 54  90]
 [ 57  95]
 [ 60 100]
 [ 63 105]
 [ 66 110]
 [ 69 115]
 [ 72 120]
 [ 75 125]
 [ 78 130]
 [ 81 135]
 [ 84 140]
 [ 87 145]
 [ 90 150]
 [ 93 155]
 [ 96 160]
 [ 99 165]
 [102 170]
 [105 175]
 [108 180]
 [111 185]
 [114 190]
 [117 195]
 [120 200]
 [123 205]
 [126 210]
 [129 215]
 [132 220]
 [135 225]]


In [6]:
X = np.array(X).reshape(15, 3, 2)
print(X)

# We have 45 rows in total and two columns in our dataset. 
# We will reshape our dataset into 15 samples, 3 time-steps, and two features.

[[[  3   5]
  [  6  10]
  [  9  15]]

 [[ 12  20]
  [ 15  25]
  [ 18  30]]

 [[ 21  35]
  [ 24  40]
  [ 27  45]]

 [[ 30  50]
  [ 33  55]
  [ 36  60]]

 [[ 39  65]
  [ 42  70]
  [ 45  75]]

 [[ 48  80]
  [ 51  85]
  [ 54  90]]

 [[ 57  95]
  [ 60 100]
  [ 63 105]]

 [[ 66 110]
  [ 69 115]
  [ 72 120]]

 [[ 75 125]
  [ 78 130]
  [ 81 135]]

 [[ 84 140]
  [ 87 145]
  [ 90 150]]

 [[ 93 155]
  [ 96 160]
  [ 99 165]]

 [[102 170]
  [105 175]
  [108 180]]

 [[111 185]
  [114 190]
  [117 195]]

 [[120 200]
  [123 205]
  [126 210]]

 [[129 215]
  [132 220]
  [135 225]]]


In [7]:
# The output will also have 15 values corresponding to 15 input samples. 
# Each value in the output will be the sum of the two feature values in the third time-step of each input sample
# the third time-step of the first sample have features 9 and 15, hence the output will be 24.
# the two feature values in the third time-step of the 2nd sample are 18 and 30


In [9]:
Y = np.array([ 24,48 ,72 ,96,120,144,168,192,216,240,264,288,312,336,360])

In [10]:
# Solution via Simple LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(3, 2)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

history = model.fit(X, Y, epochs=1000, validation_split=0.2, verbose=0)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [11]:
test_input = np.array([[8, 51],
                       [11,56],
                       [14,61]])

test_input = test_input.reshape((1, 3, 2))

test_output = model.predict(test_input, verbose=0)
print(test_output)

# The sum of two features of the third time-step of the input is 14 + 61 = 75. 
# Our model with one LSTM layer predicted 91.33, which is pretty close.

[[73.26133]]


In [12]:
# Solution via Stacked LSTM
model = Sequential()
model.add(LSTM(200, activation='relu', return_sequences=True, input_shape=(3, 2)))
model.add(LSTM(100, activation='relu', return_sequences=True))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(LSTM(25, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

history = model.fit(X, Y, epochs=1000, validation_split=0.2, verbose=0)

In [13]:

test_output = model.predict(test_input, verbose=0)
print(test_output)

# The output I received is 69.98, which is worse than the simple LSTM. Seems like our stacked LSTM is overfitting.

[[76.81729]]


In [14]:
# Solution via Bidirectional LSTM
from tensorflow.keras.layers import Bidirectional

model = Sequential()
model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(3, 2)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

history = model.fit(X, Y, epochs=1000, validation_split=0.2, verbose=0)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [15]:
test_output = model.predict(test_input, verbose=0)
print(test_output)

[[54.61706]]
