In [64]:
import tensorflow as tf
keras = tf.keras
import numpy as np 
import pandas as pd 
import sys

In [164]:
class TimeseriesGenerator(keras.utils.Sequence):
    """Utility class for generating batches of temporal data.
    This class takes in a sequence of data-points gathered at
    equal intervals, along with time series parameters such as
    stride, length of history, etc., to produce batches for
    training/validation.
    # Arguments
        data: Indexable generator (such as list or Numpy array)
            containing consecutive data points (timesteps).
            The data should be at 2D, and axis 0 is expected
            to be the time dimension.
        targets: Targets corresponding to timesteps in `data`.
            It should have same length as `data`.
        length: Length of the output sequences (in number of timesteps).
        sampling_rate: Period between successive individual timesteps
            within sequences. For rate `r`, timesteps
            `data[i]`, `data[i-r]`, ... `data[i - length]`
            are used for create a sample sequence.
        stride: Period between successive output sequences.
            For stride `s`, consecutive output samples would
            be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
        start_index: Data points earlier than `start_index` will not be used
            in the output sequences. This is useful to reserve part of the
            data for test or validation.
        end_index: Data points later than `end_index` will not be used
            in the output sequences. This is useful to reserve part of the
            data for test or validation.
        shuffle: Whether to shuffle output samples,
            or instead draw them in chronological order.
        reverse: Boolean: if `true`, timesteps in each output sample will be
            in reverse chronological order.
        batch_size: Number of timeseries samples in each batch
            (except maybe the last one).
        overlap: Number of overlap allowed in target data
        gap: Sequence Gap of target data
    # Returns
        A [Sequence](/utils/#sequence) instance.
    # Examples
    ```python
    from keras.preprocessing.sequence import TimeseriesGenerator
    import numpy as np
    data = np.array([[i] for i in range(50)])
    targets = np.array([[i] for i in range(50)])
    data_gen = TimeseriesGenerator(data, targets,
                                   length=10, sampling_rate=2,
                                   batch_size=2)
    assert len(data_gen) == 20
    batch_0 = data_gen[0]
    x, y = batch_0
    assert np.array_equal(x,
                          np.array([[[0], [2], [4], [6], [8]],
                                    [[1], [3], [5], [7], [9]]]))
    assert np.array_equal(y,
                          np.array([[10], [11]]))
    ```
    """

    def __init__(self, data, targets,
                 length,
                 sampling_rate=1,
                 length_output=1,
                 sampling_rate_output=1,
                 stride=1,
                 start_index=0,
                 end_index=None,
                 shuffle=False,
                 reverse=False,
                 batch_size=sys.maxsize,
                 augmentation=0,
                 overlap=0,
                 gap=0):

        if len(data) != len(targets):
            raise ValueError('Data and targets have to be' +
                             ' of same length. '
                             'Data length is {}'.format(len(data)) +
                             ' while target length is {}'.format(len(targets)))

        self.data = data
        self.targets = targets
        self.length = length
        self.length_output = length_output
        self.sampling_rate = sampling_rate
        self.sampling_rate_output = sampling_rate_output
        self.stride = stride
        self.start_index = start_index
        if end_index is None:
            end_index = len(data) - 1
        self.end_index = end_index
        self.shuffle = shuffle
        self.reverse = reverse
        self.batch_size = batch_size
        self.augmentation = augmentation
        self.overlap = overlap
        self.gap = gap

        # the check below the way it was before didn't make sense since the generator might be used to represent only past data too.
        # Adding one to the right side of the comparison for that very reason!
        if self.start_index + length > self.end_index + 1:
            raise ValueError('`start_index+length=%i > end_index=%i` '
                             'is disallowed, as no part of the sequence '
                             'would be left to be used as current step.'
                             % (self.start_index + length, self.end_index))

    def __len__(self):
        if self.batch_size == sys.maxsize:
            return 1
        return int((self.end_index - self.start_index - self.length + 1 - self.length_output + self.overlap + self.augmentation - self.gap)//(self.batch_size * self.stride)) + 1

    def __getitem__(self, index):
        i = self.start_index + self.length
        if index != 0:
            i = i + self.batch_size * self.stride * index
        rows = np.arange(
            i,
            min(
                i + self.batch_size * self.stride,
                self.end_index + 2 - self.length_output
            ),
            self.stride
        )
        if self.shuffle:
            np.random.shuffle(rows)

        samples = np.stack([self.data[row - self.length:row:self.sampling_rate]
                            for row in rows])
        if self.augmentation:
            augmented_rows = [row + np.random.randint(-self.augmentation, self.augmentation+1) for row in rows]
        else:
            augmented_rows = rows
        targets = np.stack([
            self.targets[
                row - self.overlap + self.gap: row + self.length_output + self.gap: self.sampling_rate_output
            ] for row in augmented_rows
        ])

        if targets.shape[1] == 1:
            targets = targets.squeeze(1)

        if self.reverse:
            return samples[:, ::-1, ...], targets
        return samples, targets

In [150]:
np.stack([data[row - lags:row + gap:sampling_rate]
                            for row in rows])

array([[[ 0, 10],
        [ 1, 20],
        [ 2, 30],
        [ 3, 40],
        [ 4, 50],
        [ 5, 60]]])

In [188]:
data = np.array([[i, (j+1)*10] for i,j in zip(range(50),range(50))])
target = np.array([[i] for i in range(50)])

start_index = 0
end_index = len(data) - 1
lags=5
sampling_rate=1
batch_size=1
output_length=1
stride=1
sampling_rate_output=1
overlap=0
gap=1

data_gen = TimeseriesGenerator(data, target,
                                length=lags, 
                                sampling_rate=sampling_rate,
                                batch_size=batch_size, 
                                overlap=overlap,
                                length_output=output_length, 
                                gap=gap,
                                stride=stride)
batch_0 = data_gen[0]                                   

In [189]:
for i in range(len(data_gen)):
    x, y = data_gen[i]
    print('%s => %s' % (x.squeeze(), y.squeeze()))

[[ 0 10]
 [ 1 20]
 [ 2 30]
 [ 3 40]
 [ 4 50]] => 6
[[ 1 20]
 [ 2 30]
 [ 3 40]
 [ 4 50]
 [ 5 60]] => 7
[[ 2 30]
 [ 3 40]
 [ 4 50]
 [ 5 60]
 [ 6 70]] => 8
[[ 3 40]
 [ 4 50]
 [ 5 60]
 [ 6 70]
 [ 7 80]] => 9
[[ 4 50]
 [ 5 60]
 [ 6 70]
 [ 7 80]
 [ 8 90]] => 10
[[  5  60]
 [  6  70]
 [  7  80]
 [  8  90]
 [  9 100]] => 11
[[  6  70]
 [  7  80]
 [  8  90]
 [  9 100]
 [ 10 110]] => 12
[[  7  80]
 [  8  90]
 [  9 100]
 [ 10 110]
 [ 11 120]] => 13
[[  8  90]
 [  9 100]
 [ 10 110]
 [ 11 120]
 [ 12 130]] => 14
[[  9 100]
 [ 10 110]
 [ 11 120]
 [ 12 130]
 [ 13 140]] => 15
[[ 10 110]
 [ 11 120]
 [ 12 130]
 [ 13 140]
 [ 14 150]] => 16
[[ 11 120]
 [ 12 130]
 [ 13 140]
 [ 14 150]
 [ 15 160]] => 17
[[ 12 130]
 [ 13 140]
 [ 14 150]
 [ 15 160]
 [ 16 170]] => 18
[[ 13 140]
 [ 14 150]
 [ 15 160]
 [ 16 170]
 [ 17 180]] => 19
[[ 14 150]
 [ 15 160]
 [ 16 170]
 [ 17 180]
 [ 18 190]] => 20
[[ 15 160]
 [ 16 170]
 [ 17 180]
 [ 18 190]
 [ 19 200]] => 21
[[ 16 170]
 [ 17 180]
 [ 18 190]
 [ 19 200]
 [ 20 210]] => 22


In [153]:
generator = keras.preprocessing.sequence.TimeseriesGenerator(data, target, length=lags, batch_size=2)
data_gen[-1][1]
generator[-1][1]

array([[3],
       [4]])

In [194]:
model = keras.Sequential()
model.add(
    keras.layers.SimpleRNN(
        100, 
        activation='relu', 
        input_shape=(lags, 2)))
model.add(
    keras.layers.Dense(
        100, 
        activation='relu', 
        input_shape=(lags, 2)))
model.add(
    keras.layers.Dense(
        50, 
        activation='relu', 
        input_shape=(lags, 2)))
model.add(keras.layers.Dense(output_length))
model.compile(optimizer='adam', loss="mse",metrics='mape')
model.fit(data_gen, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x2372eaa4490>

In [195]:
model.predict(data_gen)

array([[ 5.914128 ],
       [ 6.864224 ],
       [ 7.8688483],
       [ 8.907427 ],
       [ 9.9184885],
       [10.935935 ],
       [12.051764 ],
       [13.1500435],
       [14.240685 ],
       [15.323388 ],
       [16.405249 ],
       [17.489267 ],
       [18.576496 ],
       [19.664663 ],
       [20.752834 ],
       [21.841003 ],
       [22.924736 ],
       [24.001163 ],
       [25.077589 ],
       [26.15401  ],
       [27.230442 ],
       [28.306862 ],
       [29.383291 ],
       [30.459711 ],
       [31.53614  ],
       [32.612564 ],
       [33.68899  ],
       [34.76541  ],
       [35.841835 ],
       [36.91826  ],
       [37.994698 ],
       [39.071117 ],
       [40.147545 ],
       [41.22396  ],
       [42.30039  ],
       [43.37682  ],
       [44.453247 ],
       [45.529667 ],
       [46.606102 ],
       [47.682507 ],
       [48.75895  ],
       [49.83536  ],
       [50.911797 ],
       [51.98823  ]], dtype=float32)