In [1]:
import tensorflow as tf
keras = tf.keras
import numpy as np 
import pandas as pd 


In [2]:
class TimeseriesGenerator(object):
    """Utility class for generating batches of temporal data.
    This class takes in a sequence of data-points gathered at
    equal intervals, along with time series parameters such as
    stride, length of history, etc., to produce batches for
    training/validation.
    # Arguments
        data: Indexable generator (such as list or Numpy array)
            containing consecutive data points (timesteps).
            The data should be at 2D, and axis 0 is expected
            to be the time dimension.
        targets: Targets corresponding to timesteps in `data`.
            It should have same length as `data`.
        length: Length of the output sequences (in number of timesteps).
        sampling_rate: Period between successive individual timesteps
            within sequences. For rate `r`, timesteps
            `data[i]`, `data[i-r]`, ... `data[i - length]`
            are used for create a sample sequence.
        stride: Period between successive output sequences.
            For stride `s`, consecutive output samples would
            be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
        start_index: Data points earlier than `start_index` will not be used
            in the output sequences. This is useful to reserve part of the
            data for test or validation.
        end_index: Data points later than `end_index` will not be used
            in the output sequences. This is useful to reserve part of the
            data for test or validation.
        shuffle: Whether to shuffle output samples,
            or instead draw them in chronological order.
        reverse: Boolean: if `true`, timesteps in each output sample will be
            in reverse chronological order.
        batch_size: Number of timeseries samples in each batch
            (except maybe the last one).
    # Returns
        A [Sequence](/utils/#sequence) instance.
    # Examples
    ```python
    from keras.preprocessing.sequence import TimeseriesGenerator
    import numpy as np
    data = np.array([[i] for i in range(50)])
    targets = np.array([[i] for i in range(50)])
    data_gen = TimeseriesGenerator(data, targets,
                                   length=10, sampling_rate=2,
                                   batch_size=2)
    assert len(data_gen) == 20
    batch_0 = data_gen[0]
    x, y = batch_0
    assert np.array_equal(x,
                          np.array([[[0], [2], [4], [6], [8]],
                                    [[1], [3], [5], [7], [9]]]))
    assert np.array_equal(y,
                          np.array([[10], [11]]))
    ```
    """

    def __init__(self, data, targets,
                 length,
                 sampling_rate=1,
                 length_output=1,
                 sampling_rate_output=1,
                 stride=1,
                 start_index=0,
                 end_index=None,
                 shuffle=False,
                 reverse=False,
                 batch_size=sys.maxsize,
                 augmentation=0,
                 overlap=0):

        if len(data) != len(targets):
            raise ValueError('Data and targets have to be' +
                             ' of same length. '
                             'Data length is {}'.format(len(data)) +
                             ' while target length is {}'.format(len(targets)))

        self.data = data
        self.targets = targets
        self.length = length
        self.length_output = length_output
        self.sampling_rate = sampling_rate
        self.sampling_rate_output = sampling_rate_output
        self.stride = stride
        self.start_index = start_index
        if end_index is None:
            end_index = len(data) - 1
        self.end_index = end_index
        self.shuffle = shuffle
        self.reverse = reverse
        self.batch_size = batch_size
        self.augmentation = augmentation
        self.overlap = overlap

        # the check below the way it was before didn't make sense since the generator might be used to represent only past data too.
        # Adding one to the right side of the comparison for that very reason!
        if self.start_index + length > self.end_index + 1:
            raise ValueError('`start_index+length=%i > end_index=%i` '
                             'is disallowed, as no part of the sequence '
                             'would be left to be used as current step.'
                             % (self.start_index + length, self.end_index))

    def __len__(self):
        if self.batch_size == sys.maxsize:
            return 1
        return int((self.end_index - self.start_index - self.length + 1 - self.length_output + self.overlap + self.augmentation)//(self.batch_size * self.stride)) + 1

    def __getitem__(self, index):
        i = self.start_index + self.length
        if index != 0:
            i = i + self.batch_size * self.stride * index
        rows = np.arange(
            i,
            min(
                i + self.batch_size * self.stride,
                self.end_index + 2 - self.length_output
            ),
            self.stride
        )
        if self.shuffle:
            np.random.shuffle(rows)

        samples = np.stack([self.data[row - self.length:row:self.sampling_rate]
                            for row in rows])
        if self.augmentation:
            augmented_rows = [row + np.random.randint(-self.augmentation, self.augmentation+1) for row in rows]
        else:
            augmented_rows = rows
        targets = np.stack([
            self.targets[
                row - self.overlap : row + self.length_output : self.sampling_rate_output
            ] for row in augmented_rows
        ])

        if targets.shape[1] == 1:
            targets = targets.squeeze(1)

        if self.reverse:
            return samples[:, ::-1, ...], targets
        return samples, targets

In [30]:
data = np.array([[i, (j+1)*10] for i,j in zip(range(50),range(50))])
targets = np.array([[i] for i in range(50)])

start_index = 0
end_index = len(data) - 1
lags=5
sampling_rate=1
batch_size=2
output_length=2
stride=1

data_gen = TimeseriesGenerator(data, targets,
                                   length=lags, sampling_rate=sampling_rate,
                                   batch_size=batch_size, length_output=output_length, stride=stride)
batch_0 = data_gen[0]                                   

In [28]:
batch_0

(array([[[ 0, 10],
         [ 1, 20],
         [ 2, 30],
         [ 3, 40],
         [ 4, 50]],
 
        [[ 1, 20],
         [ 2, 30],
         [ 3, 40],
         [ 4, 50],
         [ 5, 60]]]),
 array([[[5],
         [6]],
 
        [[6],
         [7]]]))

In [33]:
i = start_index + lags

rows = np.arange(
    i,
    min(
        i + batch_size * stride,
        end_index + 2 - output_length
    ),
    stride
)

In [37]:
samples = np.stack([data[row - lags:row:sampling_rate]
                            for row in rows])
samples                            

array([[[ 0, 10],
        [ 1, 20],
        [ 2, 30],
        [ 3, 40],
        [ 4, 50]]])

In [39]:
[data[row - lags:row:2]for row in rows]

[array([[ 0, 10],
        [ 2, 30],
        [ 4, 50]])]