##### Copyright 2019 The TensorFlow Authors.

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Time series forecasting (Forecast a multivariate time series)

In [0]:
#version 26/01
from __future__ import absolute_import, division, print_function, unicode_literals
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

In [0]:
#FROM COLAB
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
  #merged_data_h.csv

In [0]:
#df = pd.read_csv('../data/merged_data_h.csv')#, header=0, parse_dates=[0], index_col=0, squeeze=True)

## The bakery sales dataset

In [0]:
df = pd.read_csv('merged_data_h.csv')#, header=0, parse_dates=[0], index_col=0, squeeze=True)

In [0]:
df.head()

In [0]:
print('Number of entries: ' + str(len(df)))

In [0]:
def univariate_data(dataset, start_index, end_index, history_size, target_size):
  data = []
  labels = []

  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size

  for i in range(start_index, end_index):
    indices = range(i-history_size, i)
    # Reshape data from (history_size,) to (history_size, 1)
    data.append(np.reshape(dataset[indices], (history_size, 1)))
    labels.append(dataset[i+target_size])
  return np.array(data), np.array(labels)

In [0]:
TRAIN_SPLIT = 1500

In [0]:
tf.random.set_seed(13)

## Part 2: Forecast a multivariate time series

In [0]:
features_considered = ['sales_brot', 'temperatur', 'weekday','bewoelkung', 'windgeschwindigkeit', 'month', 'day']
# features_considered= ['sales_broetchen', 'temperatur', 'weekday','bewoelkung',  'month', 'day', 'lagged_holiday_dummy']
#features_considered = ['sales_broetchen', 'temperatur', 'weekday','bewoelkung',  'windgeschwindigkeit', 'month', 'day', 'lagged_holiday_dummy', 'school_holiday_total']

In [0]:
features = df[features_considered]
features.index = df['date']
features.head()

In [0]:
features.plot(subplots=True)

As mentioned, the first step will be to standardize the dataset using the mean and standard deviation of the training data.

In [0]:
dataset = features.values
#data_mean = dataset[:TRAIN_SPLIT].mean(axis=0)
#data_std = dataset[:TRAIN_SPLIT].std(axis=0)

In [0]:
#dataset = (dataset-data_mean)/data_std

### Single step model
In a single step setup, the model learns to predict a single point in the future based on some history provided.

The below function performs the same windowing task as below, however, here it samples the past observation based on the step size given.

In [0]:
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
  data = []
  labels = []

  start_index = start_index + history_size
  if end_index is None:
    end_index = len(dataset) - target_size

  for i in range(start_index, end_index):
    indices = range(i-history_size, i, step)
    data.append(dataset[indices])

    if single_step:
      labels.append(target[i+target_size])
    else:
      labels.append(target[i:i+target_size])

  return np.array(data), np.array(labels)

In this tutorial, the network is shown data from the last five (5) days, i.e. 720 observations that are sampled every hour. The sampling is done every one hour since a drastic change is not expected within 60 minutes. Thus, 120 observation represent history of the last five days.  For the single step prediction model, the label for a datapoint is the temperature 12 hours into the future. In order to create a label for this, the temperature after 72(12*6) observations is used.

In [0]:
past_history = 360
future_target = 1
STEP = 1
BATCH_SIZE = 30
BUFFER_SIZE = 1000
EVALUATION_INTERVAL = 100
TARGET_DATA=dataset[:, 0]

#clssic dataset[:, 0] dataset[-1, 0] or try -1

x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 0], 0,   
                                                   TRAIN_SPLIT, past_history,
                                                   future_target, STEP,
                                                   single_step=True)
x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 0],
                                               TRAIN_SPLIT, None, past_history,
                                               future_target, STEP,
                                               single_step=True)


Let's look at a single data-point.


In [0]:
print ('Single window of past history : {}'.format(x_train_single[0].shape))

In [0]:
train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
val_data_single = val_data_single.batch(BATCH_SIZE).repeat()

In [0]:
x_train_single.shape[-2:]

In [0]:
single_step_model = tf.keras.models.Sequential()

single_step_model.add(tf.keras.layers.LSTM(64,
                                           input_shape=x_train_single.shape[-2:]))
single_step_model.add(tf.keras.layers.Dense(1))
single_step_model.add(tf.keras.layers.Lambda(lambda x: x * 1))
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8 * 10**(epoch / 20))
optimizer = tf.keras.optimizers.SGD(lr=1e-3, momentum=0.9)
#single_step_model.compile(optimizer='adam', loss='mae')
single_step_model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

In [0]:
#single_step_model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=5,
#                       strides=1, padding="causal",
#                       activation="relu",
#                      input_shape=x_train_single.shape[-2:])),
#
#single_step_model.add(tf.keras.layers.LSTM(32, return_sequences=True))
#single_step_model.add(tf.keras.layers.LSTM(16, return_sequences=True, activation='relu'))

In [0]:
print(single_step_model.summary())

In [0]:
for x, y in val_data_single.take(1):
  print(single_step_model.predict(x).shape)

In [0]:
 print(y[0].numpy())
 #print(x[0][:, 0].numpy())

In [0]:
EPOCHS = 100
single_step_history = single_step_model.fit(train_data_single, epochs=EPOCHS,
                                            steps_per_epoch=EVALUATION_INTERVAL,
                                            validation_data=val_data_single,
                                            validation_steps=30, callbacks=[lr_schedule])

In [0]:
def plot_train_history(history, title):
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(loss))

  plt.figure()

  plt.plot(epochs, loss, 'b', label='Training loss')
  plt.plot(epochs, val_loss, 'r', label='Validation loss')
  plt.title(title)
  plt.legend()

  plt.show()

In [0]:
plot_train_history(single_step_history,
                   'Single Step Training and validation loss')

In [0]:
plt.semilogx(single_step_history.history["lr"], single_step_history.history["loss"])
#plt.axis([1e-8, 1e-2, 0, 1]) 
#rescale in euros
#plt.axis([1e-100, 1, 0, 150])


#### Predict a single step future
Now that the model is trained, let's make a few sample predictions. The model is given the history of three features over the past five days sampled every hour (120 data-points), since the goal is to predict the temperature, the plot only displays the past temperature. The prediction is made one day into the future (hence the gap between the history and prediction). 

In [0]:
def create_time_steps(length):
  return list(range(-length, 0))

In [0]:
def show_plot(plot_data, delta, title):
  labels = ['History', 'True Future', 'Model Prediction']
  marker = ['.-', 'rx', 'go']
  time_steps = create_time_steps(plot_data[0].shape[0])
  if delta:
    future = delta
  else:
    future = 0

  plt.title(title)
  for i, x in enumerate(plot_data):
    if i:
      plt.plot(future, plot_data[i], marker[i], markersize=10,
               label=labels[i])
    else:
      plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
  plt.legend()
  plt.xlim([time_steps[0], (future+5)*2])
  plt.xlabel('Time-Step')
  return plt

In [0]:
for x, y in val_data_single.take(3):
  plot = show_plot([x[0][:, 0].numpy(), y[0].numpy(),
                    single_step_model.predict(x)[0]], 1,
                   'Single Step Prediction')
  plt.axis([-30, 10, 20, 300])
  plot.show()

In [0]:
#true future
print('True Future Value : {}'.format(y[0]))
#prediction
print('Predicted Value : {}'.format(single_step_model.predict(x)[0]))


**Multi-Step Model**

In [0]:
future_target = 7
x_train_multi, y_train_multi = multivariate_data(dataset, dataset[:, 0], 0,
                                                 TRAIN_SPLIT, past_history,
                                                 future_target, STEP)
x_val_multi, y_val_multi = multivariate_data(dataset, dataset[:, 0],
                                             TRAIN_SPLIT, None, past_history,
                                             future_target, STEP)

In [0]:
x_train_multi

In [0]:
print ('Single window of past history : {}'.format(x_train_multi[0].shape))
print ('\n Target Sales to predict : {}'.format(y_train_multi[0].shape))

In [0]:
train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()

In [0]:
def multi_step_plot(history, true_future, prediction):
  plt.figure(figsize=(12, 6))
  num_in = create_time_steps(len(history))
  num_out = len(true_future)

  plt.plot(num_in, np.array(history[:, 0]), label='History')
  plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'bo',
           label='True Future')
  if prediction.any():
    plt.plot(np.arange(num_out)/STEP, np.array(prediction), 'ro',
             label='Predicted Future')
  plt.legend(loc='upper left')
  plt.show()

In [0]:
for x, y in train_data_multi.take(1):
  multi_step_plot(x[0], y[0], np.array([0]))

In [0]:
multi_step_model = tf.keras.models.Sequential()
multi_step_model.add(tf.keras.layers.LSTM(32,
                                          return_sequences=True,
                                          input_shape=x_train_multi.shape[-2:]))
multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
multi_step_model.add(tf.keras.layers.Dense(7))

multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae')

In [0]:
print(multi_step_model.summary())

In [0]:
for x, y in val_data_multi.take(1):
  print (multi_step_model.predict(x).shape)

In [0]:
multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
                                          steps_per_epoch=EVALUATION_INTERVAL,
                                          validation_data=val_data_multi,
                                          validation_steps=10)

In [0]:
plot_train_history(multi_step_history, 'Multi-Step Training and validation loss')
# plt.axis([-1,1, 20, 100])
 plot.show()

#### Predict a multi-step future


In [0]:
for x, y in val_data_multi.take(3):
  multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
   




In [0]:
#true future
print('True Future Values : {}'.format(y[0]))
#prediction
print('Predicted Values : {}'.format(multi_step_model.predict(x)[0]))
