# Example of data transformations for time series forecasting with Keras

In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt
from collections import UserDict
%matplotlib inline

pd.options.display.float_format = '{:,.2f}'.format
np.set_printoptions(precision=2)

Load some data and create features

In [2]:
%run -i common/load_data.py
%run -i common/TimeSeriesTensor.py

In [3]:
if not os.path.exists(os.path.join('data', 'energy.csv')):
    %run common/extract_data.py
energy = load_data()

In [4]:
def get_holidays(energy):
    energy['timestamp'] = energy.index
    energy['Date'] = pd.to_datetime(energy['timestamp'].dt.date)
    hols = pd.read_csv(os.path.join('data', 'us_holidays.csv'))
    hols['Date'] = pd.to_datetime(hols['Date'], format='%d/%m/%Y')
    energy = pd.merge(energy, hols, on='Date', how='left')
    energy.index = energy['timestamp']
    del energy['timestamp']
    del energy['Date']
    return energy

In [5]:
energy = get_holidays(energy)

In [6]:
energy['Holiday'] = np.where(energy['Holiday'].isnull(), 0, 1)
energy['Region'] = 0

This dataset has the features:
- load (the electricity load to be forecasted)
- temp (historical temperature values)
- Holiday (1 or 0 to indicate public holiday)
- Region (categorical feature for the region. In this dataset we just have one region '0')

In [7]:
energy[12:36]

Unnamed: 0_level_0,load,temp,Holiday,Region
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-01 12:00:00,3184.0,43.67,0,0
2012-01-01 13:00:00,3147.0,43.33,0,0
2012-01-01 14:00:00,3122.0,45.0,0,0
2012-01-01 15:00:00,3137.0,44.67,0,0
2012-01-01 16:00:00,3486.0,43.33,0,0
2012-01-01 17:00:00,3717.0,41.67,0,0
2012-01-01 18:00:00,3659.0,41.33,0,0
2012-01-01 19:00:00,3513.0,41.67,0,0
2012-01-01 20:00:00,3344.0,42.67,0,0
2012-01-01 21:00:00,3129.0,43.33,0,0


Let's say we want to create an encoder-decoder RNN forecasting model. The encoder will use a sequence of 3 recent values of load, temperature and holiday info as input. The decoder uses a sequence of load and holiday info as input and makes prediction over a forecast horizon of 2. The region feature is static (common to all time steps) and will be passed through a dense layer and then replicated across each time step of both the encoder and decoder.

![Encoder-decoder](./images/encoder-decoder example.png "Encoder-decoder")

The structure of the network is not important but it does illustrate how complex the inputs to these models can be. In reality it is unnecessary to force the static region feature through a replicated dense layer as it is a categorigorical feature of size 1. However, in situations such as product sales forecasting we may have a large vector of static categorical features (storeID, product attributes etc) that we should condense before input to the RNN layers.

To prepare data for Keras, we need to create four separate numpy arrays with shapes:
- **target** (samples, 2)
- **region** (samples, 1)
- **encoder** (samples, 3, 3)
- **decoder** (samples, 2, 2)

Some sets of features need to be shifted forwards and some backwards to gather them into a single sample. It is difficult to do this with numpy arrays and even more difficult to sense check that the result is correct.

The TimeSeriesTensor class is intended to make the transformation from the initial time series into these arrays easy. The user defines the array structures when instantiating the object. The class then gathers all inputs for one sample into a single row of a pandas dataframe. It also transforms this dataframe into appropriately shaped numpy arrays, ready for input to the model. The user can inspect the dataframe to check that the resulting data structure is correct.

In [8]:
forecast_horizon = 2
input_seq_len = 3

In [9]:
tensor_structure = {'encoder':(range(-input_seq_len+1, 1), ['load', 'temp', 'Holiday']), # shift encoder features forward from t-2 to t 
                    'decoder':(range(1, forecast_horizon+1), ['load', 'Holiday']), # shift decoder features back from t to t+1
                    'region':(None, ['Region'])} # include region feature as is (no shifting required)

In [None]:
# %load common/TimeSeriesTensor.py
class TimeSeriesTensor(UserDict):
    
    # A dictionary of tensors for input into the RNN model
    
    # Use this class to:
    #   1. Shift the values of the time series to create a Pandas dataframe containing all the data
    #      for a single training example
    #   2. Discard any samples with missing values
    #   3. Transform this Pandas dataframe into a numpy array of shape 
    #      (samples, time steps, features) for input into Keras

    # The class takes the following parameters:
    #    - **dataset**: original time series
    #    - **H**: the forecast horizon
    #    - **tensor_structures**: a dictionary discribing the tensor structure of the form
    #          { 'tensor_name' : (range(max_backward_shift, max_forward_shift), [feature, feature, ...] ) }
    #          if features are non-sequential and should not be shifted, use the form
    #          { 'tensor_name' : (None, [feature, feature, ...])}
    #    - **freq**: time series frequency
    #    - **drop_incomplete**: (Boolean) whether to drop incomplete samples
    
    def __init__(self, dataset, target, H, tensor_structure, freq='H', drop_incomplete=True):
        self.dataset = dataset
        self.target = target
        self.tensor_structure = tensor_structure
        self.tensor_names = list(tensor_structure.keys())
        
        self.dataframe = self._shift_data(H, freq, drop_incomplete)
        self.data = self._df2tensors(self.dataframe)
    
    
    def _shift_data(self, H, freq, drop_incomplete):
        
        # Use the tensor_structures definitions to shift the features in the original dataset.
        # The result is a Pandas dataframe with multi-index columns in the hierarchy
        #     tensor - the name of the input tensor
        #     feature - the input feature to be shifted
        #     time step - the time step for the RNN in which the data is input. These labels
        #         are centred on time t. the forecast creation time
        df = self.dataset.copy()
        
        idx_tuples = []
        for t in range(1, H+1):
            df['t+'+str(t)] = df[self.target].shift(t*-1, freq=freq)
            idx_tuples.append(('target', 'y', 't+'+str(t)))

        for name, structure in self.tensor_structure.items():
            rng = structure[0]
            dataset_cols = structure[1]
            
            for col in dataset_cols:
            
            # do not shift non-sequential 'static' features
                if rng is None:
                    df['context_'+col] = df[col]
                    idx_tuples.append((name, col, 'static'))

                else:
                    for t in rng:
                        sign = '+' if t > 0 else ''
                        shift = str(t) if t != 0 else ''
                        period = 't'+sign+shift
                        shifted_col = name+'_'+col+'_'+period
                        df[shifted_col] = df[col].shift(t*-1, freq=freq)
                        idx_tuples.append((name, col, period))
                
        df = df.drop(self.dataset.columns, axis=1)
        idx = pd.MultiIndex.from_tuples(idx_tuples, names=['tensor', 'feature', 'time step'])
        df.columns = idx

        if drop_incomplete:
            df = df.dropna(how='any')

        return df
    
    
    def _df2tensors(self, dataframe):
        
        # Transform the shifted Pandas dataframe into the multidimensional numpy arrays. These
        # arrays can be used to input into the keras model and can be accessed by tensor name.
        # For example, for a TimeSeriesTensor object named "model_inputs" and a tensor named
        # "target", the input tensor can be acccessed with model_inputs['target']
    
        inputs = {}
        y = dataframe['target']
        y = y.as_matrix()
        inputs['target'] = y

        for name, structure in self.tensor_structure.items():
            rng = structure[0]
            cols = structure[1]
            tensor = dataframe[name][cols].as_matrix()
            if rng is None:
                tensor = tensor.reshape(tensor.shape[0], len(cols))
            else:
                tensor = tensor.reshape(tensor.shape[0], len(cols), len(rng))
                tensor = np.transpose(tensor, axes=[0, 2, 1])
            inputs[name] = tensor

        return inputs
    
    
    def subset_data(self, new_dataframe):
        
        # Use this function to recreate the input tensors if the shifted dataframe
        # has been filtered.
        
        self.dataframe = new_dataframe
        self.data = self._df2tensors(self.dataframe)

Create the model inputs

In [11]:
model_inputs = TimeSeriesTensor(dataset=energy,
                                 target='load',
                                 H=forecast_horizon,
                                 tensor_structure=tensor_structure,
                                 drop_incomplete=True)

Check result with the dataframe. The dataframe is indexed on time *t* (forecast creation time) and one row represents one sample. The dataframe has a multi-indexed columns to make it easier to check that the inputs for each tensor have been shifted correctly.

In [12]:
model_inputs.dataframe.head(24)

tensor,target,target,region,encoder,encoder,encoder,encoder,encoder,encoder,encoder,encoder,encoder,decoder,decoder,decoder,decoder
feature,y,y,Region,load,load,load,temp,temp,temp,Holiday,Holiday,Holiday,load,load,Holiday,Holiday
time step,t+1,t+2,static,t-2,t-1,t,t-2,t-1,t,t-2,t-1,t,t+1,t+2,t+1,t+2
timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
2012-01-01 02:00:00,2402.0,2403.0,0,2698.0,2558.0,2444.0,32.0,32.67,30.0,0.0,0.0,0,2402.0,2403.0,0.0,0.0
2012-01-01 03:00:00,2403.0,2453.0,0,2558.0,2444.0,2402.0,32.67,30.0,31.0,0.0,0.0,0,2403.0,2453.0,0.0,0.0
2012-01-01 04:00:00,2453.0,2560.0,0,2444.0,2402.0,2403.0,30.0,31.0,32.0,0.0,0.0,0,2453.0,2560.0,0.0,0.0
2012-01-01 05:00:00,2560.0,2719.0,0,2402.0,2403.0,2453.0,31.0,32.0,31.33,0.0,0.0,0,2560.0,2719.0,0.0,0.0
2012-01-01 06:00:00,2719.0,2916.0,0,2403.0,2453.0,2560.0,32.0,31.33,30.0,0.0,0.0,0,2719.0,2916.0,0.0,0.0
2012-01-01 07:00:00,2916.0,3105.0,0,2453.0,2560.0,2719.0,31.33,30.0,29.0,0.0,0.0,0,2916.0,3105.0,0.0,0.0
2012-01-01 08:00:00,3105.0,3174.0,0,2560.0,2719.0,2916.0,30.0,29.0,29.0,0.0,0.0,0,3105.0,3174.0,0.0,0.0
2012-01-01 09:00:00,3174.0,3180.0,0,2719.0,2916.0,3105.0,29.0,29.0,33.33,0.0,0.0,0,3174.0,3180.0,0.0,0.0
2012-01-01 10:00:00,3180.0,3184.0,0,2916.0,3105.0,3174.0,29.0,33.33,36.67,0.0,0.0,0,3180.0,3184.0,0.0,0.0
2012-01-01 11:00:00,3184.0,3147.0,0,3105.0,3174.0,3180.0,33.33,36.67,40.33,0.0,0.0,0,3184.0,3147.0,0.0,0.0


The TimeSeriesTensor class is a dictionary and model inputs can be accessed by key. For example:

In [13]:
model_inputs['encoder']

array([[[ 2698.  ,    32.  ,     0.  ],
        [ 2558.  ,    32.67,     0.  ],
        [ 2444.  ,    30.  ,     0.  ]],

       [[ 2558.  ,    32.67,     0.  ],
        [ 2444.  ,    30.  ,     0.  ],
        [ 2402.  ,    31.  ,     0.  ]],

       [[ 2444.  ,    30.  ,     0.  ],
        [ 2402.  ,    31.  ,     0.  ],
        [ 2403.  ,    32.  ,     0.  ]],

       ..., 
       [[ 4319.  ,    22.33,     0.  ],
        [ 4199.  ,    20.  ,     0.  ],
        [ 4012.  ,    18.  ,     0.  ]],

       [[ 4199.  ,    20.  ,     0.  ],
        [ 4012.  ,    18.  ,     0.  ],
        [ 3856.  ,    16.67,     0.  ]],

       [[ 4012.  ,    18.  ,     0.  ],
        [ 3856.  ,    16.67,     0.  ],
        [ 3671.  ,    17.  ,     0.  ]]])

In [14]:
model_inputs['target']

array([[ 2402.,  2403.],
       [ 2403.,  2453.],
       [ 2453.,  2560.],
       ..., 
       [ 3856.,  3671.],
       [ 3671.,  3499.],
       [ 3499.,  3345.]])

Implement the model

In [15]:
from keras.models import Model, Sequential
from keras.layers import GRU, Dense, Input, Concatenate, RepeatVector, TimeDistributed, Flatten
from keras.callbacks import Callback, EarlyStopping

Using TensorFlow backend.


In [16]:
# define all model inputs
region_input = Input(shape=(1,))
encoder_input = Input(shape=(input_seq_len, 3))
decoder_input = Input(shape=(forecast_horizon, 2))
decoder_output = Input(shape=(forecast_horizon,))

In [17]:
# pass region input through dense layer and replicate across all time steps
region_dense = Dense(1, activation='relu')(region_input)
region_dense_enc = RepeatVector(input_seq_len)(region_dense)
encoder_region_input = Concatenate()([encoder_input, region_dense_enc])

region_dense_dec = RepeatVector(forecast_horizon)(region_dense)
decoder_region_input = Concatenate()([decoder_input, region_dense_dec])

In [18]:
# build encoder, retaining the final hidden state
encoder = GRU(5, return_state=True)
encoder_output, state_h = encoder(encoder_region_input)
encoder_states = [state_h]

In [19]:
# build decoder, initializing hidden state with encoder hidden state
decoder = GRU(5, return_state=True, return_sequences=True)
decoder_output, _ = decoder(decoder_region_input, initial_state=encoder_states)

In [20]:
# add time distributed dense layer on top of decoder and flatten the output to make the predictions
decoder_dense = TimeDistributed(Dense(1))
decoder_output = decoder_dense(decoder_output)
decoder_flatten = Flatten()(decoder_output)

In [21]:
model = Model([region_input, encoder_input, decoder_input], decoder_flatten)

In [22]:
model.compile(optimizer='RMSprop', loss='mse')

In [23]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 1)            2           input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 3, 3)         0                                            
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector)  (None, 3, 1)         0           dense_1[0][0]                    
__________________________________________________________________________________________________
input_3 (I

In [24]:
model.fit([model_inputs['region'], model_inputs['encoder'], model_inputs['decoder']],
          model_inputs['target'],
          batch_size=32,
          epochs=5,
          verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f941c2e3ef0>