question: how to overcome stationarity? 

question: multi-step forward prediction -- https://machinelearningmastery.com/multi-step-time-series-forecasting/

question: question why stateful?

## Define Hyper-Parameters

In [1]:
%matplotlib notebook

In [2]:
'''
LSTM playground
'''

from __future__ import print_function

import matplotlib.pyplot as plt
import numpy as np
from keras.layers import Dense, SimpleRNN, LSTM, Dropout
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [119]:
trainingParams = {}
trainingParams['batchSize'] = 16
trainingParams['unrolledTimesteps'] = 200
trainingParams['inputDimensionality'] = 1
trainingParams['forwardSamplesToPredict'] = 10

trainingParams['trainEpochs'] = 100

## Generate Target Dataset

In [120]:
xRange = np.linspace(start=0, stop=25*np.pi, num=250000)
targetData = np.cos( xRange ) * xRange/20

In [121]:
import matplotlib.pyplot as plt
plt.figure()
plt.plot( targetData )
plt.show()

<IPython.core.display.Javascript object>

## Generate Training Matrix -- Input/Output Batches
aka sequences subsampling -- break up big sequence into batches of unrolled timestep duration

In [122]:
def get_batched_data(inputData, trainingParams):
    
    if len(inputData.shape) == 1:
        inputData = np.expand_dims( inputData, 1)

    RNNBatchSamples = int ( trainingParams['batchSize'] * trainingParams['unrolledTimesteps'] )
    totalBatchesInDataset = int ( len(inputData) / RNNBatchSamples )
    
    batchedInputMatrix = np.zeros( ( int(totalBatchesInDataset), 
                                    int(trainingParams['batchSize']), 
                                    int(trainingParams['unrolledTimesteps']), 
                                    int(trainingParams['inputDimensionality']) ) )
    
    batchedOutputMatrix = np.zeros ( ( int(totalBatchesInDataset),
                                      int(trainingParams['batchSize']),
                                      int(trainingParams['forwardSamplesToPredict'])))
    
    
    for iInputDimension in range ( trainingParams['inputDimensionality'] ):
        for iBatch in range( totalBatchesInDataset ):
            startIndex = iBatch * RNNBatchSamples
            endIndex = startIndex + RNNBatchSamples

            batchedInputMatrix[iBatch, :, :, iInputDimension] = np.reshape( inputData[ startIndex:endIndex, iInputDimension ],
                                                                           ( trainingParams['batchSize'],
                                                                            trainingParams['unrolledTimesteps']) )
            # within a batch
            for iBatchElement in range ( trainingParams['batchSize'] ):
                batchElementEnd = startIndex + trainingParams['unrolledTimesteps'] * (iBatchElement+1)
                batchElementPredictedTarget = batchElementEnd + trainingParams['forwardSamplesToPredict']                
                batchedOutputMatrix[iBatch, iBatchElement ] = inputData[ batchElementEnd : batchElementPredictedTarget].flatten()
    
    return batchedInputMatrix, batchedOutputMatrix

In [123]:
batchedInputMatrix, batchedOutputMatrix = get_batched_data ( targetData, trainingParams )

In [124]:
batchedInputMatrix.shape

(78, 16, 200, 1)

In [125]:
batchedOutputMatrix.shape

(78, 16, 10)

In [126]:
batchedInputMatrix[0,0]

array([[0.00000000e+00],
       [1.57080253e-05],
       [3.14160460e-05],
       [4.71240574e-05],
       [6.28320548e-05],
       [7.85400336e-05],
       [9.42479892e-05],
       [1.09955917e-04],
       [1.25663812e-04],
       [1.41371670e-04],
       [1.57079486e-04],
       [1.72787255e-04],
       [1.88494974e-04],
       [2.04202636e-04],
       [2.19910238e-04],
       [2.35617775e-04],
       [2.51325243e-04],
       [2.67032635e-04],
       [2.82739949e-04],
       [2.98447179e-04],
       [3.14154321e-04],
       [3.29861369e-04],
       [3.45568320e-04],
       [3.61275169e-04],
       [3.76981911e-04],
       [3.92688541e-04],
       [4.08395054e-04],
       [4.24101447e-04],
       [4.39807714e-04],
       [4.55513852e-04],
       [4.71219854e-04],
       [4.86925716e-04],
       [5.02631435e-04],
       [5.18337004e-04],
       [5.34042421e-04],
       [5.49747679e-04],
       [5.65452774e-04],
       [5.81157702e-04],
       [5.96862457e-04],
       [6.12567036e-04],


In [127]:
batchedOutputMatrix[0,0]

array([0.00313541, 0.00315102, 0.00316663, 0.00318225, 0.00319786,
       0.00321347, 0.00322908, 0.00324469, 0.0032603 , 0.0032759 ])

In [128]:
plt.figure()
inputData = batchedInputMatrix[0, 10]
targetData = batchedOutputMatrix[0, 10]
plt.plot(np.arange(len(inputData)), inputData, 'x')
plt.plot(np.arange(len(inputData),len(inputData)+len(targetData)), targetData, 'xr')
plt.legend(['input','prediction target'])
plt.show()

<IPython.core.display.Javascript object>

## Define Model Architecture

https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

## Model Statefulness (from Keras documentation)

<b>Note on using statefulness in RNNs</b> -- source: https://keras.io/layers/recurrent/

You can set RNN layers to be 'stateful', which means that the states computed for the samples in one batch will be reused as initial states for the samples in the next batch. This assumes a one-to-one mapping between samples in different successive batches.

To enable statefulness: - specify stateful=True in the layer constructor. - specify a fixed batch size for your model, by passing if sequential model: batch_input_shape=(...) to the first layer in your model. else for functional model with 1 or more Input layers: batch_shape=(...) to all the first layers in your model. This is the expected shape of your inputs including the batch size. It should be a tuple of integers, e.g. (32, 10, 100). - specify shuffle=False when calling fit().

To reset the states of your model, call .reset_states() on either a specific layer, or on your entire model.


## Simple Dense Model

In [129]:
print('creating model')
simpleDenseModel = Sequential()

# lstm layer
simpleDenseModel.add( Dense( 25, input_dim=trainingParams['unrolledTimesteps']))

# output layer
simpleDenseModel.add( Dense( trainingParams['forwardSamplesToPredict'], activation='linear') )

# compile
simpleDenseModel.compile(loss='mse', optimizer='adam')

creating model


## Simple non-Stateful LSTM Model

In [None]:
print('creating model')
simpleNonStatefulModel = Sequential()

# lstm layer
simpleNonStatefulModel.add( LSTM( 10,
                batch_input_shape = ( trainingParams['batchSize'], 
                                      trainingParams['unrolledTimesteps'],
                                      trainingParams['inputDimensionality'] ),
                return_sequences = False, # needed in case we stack LSTM layers
                stateful = False ) ) 
# dense layer
simpleNonStatefulModel.add( Dense( 10, activation='sigmoid') )

# output layer
simpleNonStatefulModel.add( Dense( trainingParams['forwardSamplesToPredict'], activation='linear') )

# compile
simpleNonStatefulModel.compile(loss='mse', optimizer='adam')

## Simple Stateful Model

In [None]:
print('creating model')
simpleStatefulModel = Sequential()

# lstm layer
simpleStatefulModel.add( LSTM( 10,
                batch_input_shape = ( trainingParams['batchSize'], 
                                      trainingParams['unrolledTimesteps'],
                                      trainingParams['inputDimensionality'] ),
                return_sequences = False, # needed in case we stack LSTM layers
                stateful = True ) ) 
# dense layer
simpleStatefulModel.add( Dense( 10, activation='sigmoid') )

# output layer
simpleStatefulModel.add( Dense( trainingParams['forwardSamplesToPredict'], activation='linear') )

# compile
simpleStatefulModel.compile(loss='mse', optimizer='adam')

## Complex* Stateful Model

In [144]:
print('creating model')
complexStatefulModel = Sequential()

# lstm layer
complexStatefulModel.add( LSTM( 35,
                batch_input_shape = ( trainingParams['batchSize'], 
                                      trainingParams['unrolledTimesteps'],
                                      trainingParams['inputDimensionality'] ),
                return_sequences = True, # needed in case we stack LSTM layers
                stateful = True ) ) 
complexStatefulModel.add( Dropout(.2) )

# 2nd lstm layer
complexStatefulModel.add( LSTM( 35 ))
complexStatefulModel.add( Dropout(.2) )

# dense layer
complexStatefulModel.add( Dense( 20, activation='sigmoid') )
complexStatefulModel.add( Dropout(.2) )

# output layer
complexStatefulModel.add( Dense( trainingParams['forwardSamplesToPredict'], activation='linear') )

# compile
complexStatefulModel.compile(loss='mse', optimizer='adam')

creating model


## Select Model

In [145]:
denseNet = True
model = simpleDenseModel

denseNet = False
model = complexStatefulModel

## Training Loop

In [None]:
!pip install sklearn

In [146]:
early_stopping = EarlyStopping( monitor = 'val_loss', patience = 15)
checkpointer = ModelCheckpoint( filepath = 'best_weights.hdf5', verbose = 0, save_best_only = True)

if denseNet:
    inputMatrix = batchedInputMatrix.squeeze()
else:
    inputMatrix = batchedInputMatrix

from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split( inputMatrix, batchedOutputMatrix, test_size=0.33, random_state=42)#, shuffle=False)


In [147]:
batchedOutputMatrix.shape

(78, 16, 10)

In [148]:
xTrain.shape

(52, 16, 200, 1)

In [149]:
yTrain.shape

(52, 16, 10)

In [150]:
print('training initiated')
NBatches = xTrain.shape[0]

lossHistory = np.zeros((trainingParams['trainEpochs'],1))

for iEpoch in range( trainingParams['trainEpochs'] ):
    print( 'epoch ', iEpoch, ' / ', trainingParams['trainEpochs'])
    
    batchLoss = 0
    for iBatch in range (  NBatches ):        
        history = model.fit( xTrain[iBatch], yTrain[iBatch], 
                   batch_size = trainingParams['batchSize'],                            
                   verbose = 0,
                   nb_epoch = 1, 
                   shuffle=False,
                   callbacks = [early_stopping, checkpointer],
                   validation_data = (xTest[0], yTest[0]) )
        
        batchLoss += history.history['loss'][0]
        
    lossHistory[iEpoch] = batchLoss/NBatches
    print('average batch loss: ', lossHistory[iEpoch])
    
    model.reset_states()

training initiated
epoch  0  /  100




average batch loss:  [2.61763938]
epoch  1  /  100
average batch loss:  [1.97900089]
epoch  2  /  100
average batch loss:  [1.57789533]
epoch  3  /  100
average batch loss:  [1.24394699]
epoch  4  /  100
average batch loss:  [1.02600708]
epoch  5  /  100
average batch loss:  [0.85515497]
epoch  6  /  100
average batch loss:  [0.68840737]
epoch  7  /  100
average batch loss:  [0.60014508]
epoch  8  /  100
average batch loss:  [0.52464332]
epoch  9  /  100
average batch loss:  [0.47366052]
epoch  10  /  100
average batch loss:  [0.41579239]
epoch  11  /  100
average batch loss:  [0.40452354]
epoch  12  /  100
average batch loss:  [0.37296267]
epoch  13  /  100
average batch loss:  [0.31133796]
epoch  14  /  100
average batch loss:  [0.29138379]
epoch  15  /  100
average batch loss:  [0.26502208]
epoch  16  /  100
average batch loss:  [0.23975198]
epoch  17  /  100
average batch loss:  [0.22951116]
epoch  18  /  100
average batch loss:  [0.21150666]
epoch  19  /  100
average batch loss:  

In [137]:
# load best weights
model.load_weights("best_weights.hdf5")
model.compile(optimizer = 'adam', loss = 'mse') # need to recompile model to be able to run prediction

## Learning Curve

In [138]:
plt.figure()
plt.plot(lossHistory,'-x')
plt.show()

<IPython.core.display.Javascript object>

## Evaluating Model Quality

In [139]:
if denseNet:
    inputMatrix = batchedInputMatrix.squeeze()
else:
    inputMatrix = batchedInputMatrix

## Single Prediction

In [140]:
iBatch = 0
iBatchElement = 0

currentPredictions = model.predict(inputMatrix[iBatch], batch_size=trainingParams['batchSize'])
inputData = batchedInputMatrix[iBatch, iBatchElement]
targetData = batchedOutputMatrix[iBatch, iBatchElement]
predictedData = currentPredictions[iBatchElement]

plt.figure()
plt.plot(np.arange(len(inputData)) , inputData, 'b')
plt.plot(np.arange(len(inputData), len(inputData)+len(targetData) ) , targetData, 'ro')
plt.plot(np.arange(len(inputData), len(inputData)+len(targetData) ) , predictedData, 'kx')
plt.legend(['input','target', 'predicted '])
    
plt.show()

<IPython.core.display.Javascript object>

## Batched Prediction

In [142]:
batchNum = 0
batchElement = 0

plt.figure()
offset = 0
NBatches = batchedInputMatrix.shape[0]
for iBatch in range ( 20 ):
    currentPredictions = model.predict(inputMatrix[iBatch], batch_size=trainingParams['batchSize'])

    for iBatchElement in range( trainingParams['batchSize']):
        inputData = batchedInputMatrix[iBatch, iBatchElement]
        targetData = batchedOutputMatrix[iBatch, iBatchElement]
        predictedData = currentPredictions[iBatchElement]

        plt.plot(np.arange(len(inputData)) + offset, inputData, 'b')
        plt.plot(np.arange(len(inputData), len(inputData)+len(targetData) )+ offset, targetData, 'ro')
        plt.plot(np.arange(len(inputData), len(inputData)+len(targetData) )+ offset, predictedData, 'kx')
        offset += len(inputData)
        plt.legend(['input','target', 'predicted '])
    
plt.show()

<IPython.core.display.Javascript object>