In [40]:
"""
Sequence Prediction using LSTMs

Given 5 concecutive numbers, predict the next one.
I used an LSTM unit with a fully connected feed-forward network at the end, because LSTM only outputs in a smaller range

I will try to play around with
- batch_size (1, 5, 10, 20)
- Dense layer width (1, 3, 5, 10)
- optimizer ('adam', 'rmsprop')
- loss ('mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error')
"""

"\nSequence Prediction using LSTMs\n\nGiven 5 concecutive numbers, predict the next one.\nI used an LSTM unit with a fully connected feed-forward network at the end, because LSTM only outputs in a smaller range\n\nI will try to play around with\n- batch_size (1, 5, 10, 20)\n- Dense layer width (1, 3, 5, 10)\n- optimizer ('adam', 'rmsprop')\n- loss ('mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error')\n"

In [41]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation

import numpy as np

# create training, validation and testing datasets

x_train = [] # (examples * timestep * input_dim)
y_train = [] # (examples * input_dim)
for example in range(10000):
    temp = np.random.random()*100
    x_train.append([[temp], [temp+1], [temp+2], [temp+3], [temp+4]])
    y_train.append([temp+5])
    
x_val = [] # (examples * timestep * input_dim)
y_val = [] # (examples * input_dim)
for example in range(80):
    temp = np.random.random()*100
    x_val.append([[temp], [temp+1], [temp+2], [temp+3], [temp+4]])
    y_val.append([temp+5])

x_test = []
for example in range(20): # one batch for testing
    x_test.append([[example], [example+1], [example+2], [example+3], [example+4]])

x_test = np.array(x_test)
x_train = np.array(x_train)
x_val = np.array(x_val)
y_train = np.array(y_train)
y_val = np.array(y_val)

In [42]:
def create_model(batch_size, dense_width, optimizer, loss):
    timesteps = 5
    input_dim = 1

    model = Sequential()
    model.add(LSTM(1, batch_input_shape=(batch_size, timesteps, input_dim))) #(batch_size, timesteps, input_dim)
    # added a hidden layer on top because of the large range of values expected 
    model.add(Dense(dense_width))
    model.add(Dense(1))

    model.compile(loss=loss,
              optimizer=optimizer) # we skipped metrics
    
    return model

In [44]:
batch_sizes = (1, 5, 10, 20)
dense_widths = (1, 3, 5, 10)
optimizers = ('adam', 'rmsprop')
loss = ('mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error')

batch_size = 1
model1 = create_model(1, 1, 'adam', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
  404/10000 [>.............................] - ETA: 2:48 - loss: 833.9845

KeyboardInterrupt: 

In [45]:
batch_size = 20
model1 = create_model(batch_size, 1, 'adam', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a4979fa2e8>

In [46]:
model1.predict(x_test)

array([[ 6.9220033],
       [ 7.7155104],
       [ 8.910839 ],
       [ 9.862392 ],
       [10.702709 ],
       [11.4645405],
       [12.226768 ],
       [12.977165 ],
       [13.743275 ],
       [14.524788 ],
       [15.321396 ],
       [16.132753 ],
       [16.958511 ],
       [17.798294 ],
       [18.651726 ],
       [19.518404 ],
       [20.397928 ],
       [21.289875 ],
       [22.193832 ],
       [23.109358 ]], dtype=float32)

In [47]:
batch_size = 10
model1 = create_model(batch_size, 1, 'adam', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a4984ab898>

In [49]:
model1.predict(x_test[:10])

array([[55.153145],
       [55.153145],
       [55.153145],
       [55.153145],
       [55.153145],
       [55.153145],
       [55.153145],
       [55.153145],
       [55.153145],
       [55.153145]], dtype=float32)

In [51]:
batch_size = 10
model1 = create_model(batch_size, 1, 'rmsprop', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a49c0be3c8>

In [52]:
model1.predict(x_test[:10])

array([[ 6.650261 ],
       [ 6.7518044],
       [ 7.6232786],
       [ 8.475551 ],
       [ 9.325229 ],
       [10.181229 ],
       [11.0598545],
       [11.970317 ],
       [12.914243 ],
       [13.888126 ]], dtype=float32)

In [53]:
batch_size = 10
model1 = create_model(batch_size, 3, 'rmsprop', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a49cab8b38>

In [54]:
model1.predict(x_test[:10])

array([[ 4.3492937],
       [ 6.6462154],
       [ 7.57467  ],
       [ 7.9102955],
       [ 8.607642 ],
       [ 9.564816 ],
       [10.595474 ],
       [11.641147 ],
       [12.688025 ],
       [13.731581 ]], dtype=float32)

In [55]:
batch_size = 10
model1 = create_model(batch_size, 3, 'rmsprop', 'mean_absolute_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a49f381978>

In [56]:
model1.predict(x_test[:10])

array([[ 5.825353],
       [ 6.115031],
       [ 7.142874],
       [ 8.170712],
       [ 9.198555],
       [10.226398],
       [11.254238],
       [12.282081],
       [13.309918],
       [14.337771]], dtype=float32)

In [57]:
batch_size = 10
model1 = create_model(batch_size, 3, 'rmsprop', 'mean_absolute_percentage_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a4a0abff60>

In [58]:
model1.predict(x_test[:10])

array([[ 6.131521 ],
       [ 1.5297997],
       [ 5.7418137],
       [ 8.760043 ],
       [10.622157 ],
       [11.715281 ],
       [12.423816 ],
       [13.14375  ],
       [13.874999 ],
       [14.6174345]], dtype=float32)

In [59]:
batch_size = 20
model1 = create_model(batch_size, 3, 'rmsprop', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a4a248af28>

In [60]:
model1.predict(x_test)

array([[10.411889],
       [ 6.500748],
       [ 8.18046 ],
       [10.095548],
       [11.7875  ],
       [13.007392],
       [14.19679 ],
       [15.080066],
       [15.90831 ],
       [16.74871 ],
       [17.44182 ],
       [18.131783],
       [18.833862],
       [19.547955],
       [20.273989],
       [21.011871],
       [21.761494],
       [22.522728],
       [23.295464],
       [24.07959 ]], dtype=float32)

In [61]:
batch_size = 20
model1 = create_model(batch_size, 5, 'rmsprop', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a4a3df70b8>

In [62]:
batch_size = 20
model1 = create_model(batch_size, 5, 'adam', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a4a789ff98>

In [63]:
model1.predict(x_test)

array([[ 5.8057785],
       [ 6.229392 ],
       [ 6.9510884],
       [ 7.7992773],
       [ 8.747124 ],
       [ 9.764968 ],
       [10.823353 ],
       [11.8959465],
       [12.967245 ],
       [14.343191 ],
       [15.364664 ],
       [16.332373 ],
       [17.292048 ],
       [18.24866  ],
       [19.206987 ],
       [20.16764  ],
       [21.130926 ],
       [22.096886 ],
       [23.065252 ],
       [24.035507 ]], dtype=float32)

In [64]:
batch_size = 20
model1 = create_model(batch_size, 10, 'adam', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a49c08bf28>

In [65]:
batch_size = 20
model1 = create_model(batch_size, 10, 'adam', 'mean_squared_error')

model1.fit(x_train, y_train,
          batch_size=batch_size, epochs=20, shuffle=True,
          validation_data=(x_val, y_val))

Train on 10000 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a4a80d5e10>

In [66]:
"""
Conclusions

As we can see, the results are not always consistent, but in general
More batches seems faster and more accurate (of course too many can also be a bad thing)
Adam and RMSProp seem to work fine, although Adam seemed to be slightly better (or more consistent)
Also a 3 unit wide layer seemed to be enough, whereas 5 was also working ok - one unit was not as good

Many times we had error to be ~700 which I guess is guessing the same number that is 
minimizing the squared error for the validation data - a local minimum
"""

'\nConclusions\n\nAs we can see, the results are not always consistent, but in general\nMore batches seems faster and more accurate (of course too many can also be a bad thing)\nAdam and RMSProp seem to work fine, although Adam seemed to be slightly better (or more consistent)\nAlso a 3 unit wide layer seemed to be enough, whereas 5 was also working ok - one unit was not as good\n\nMany times we had error to be ~700 which I guess is guessing the same number that is \nminimizing the squared error for the validation data - a local minimum\n'