# Introduction

This notebook contains examples for Time Series Forecasting, and includes: 
1. Common Patterns
2. Naive Forecasting
3. Moving Average
4. Time Windows


# Common Patterns
Based on [TF Examples](https://colab.research.google.com/github/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l08c01_common_patterns.ipynb)

In [None]:
# setup
import numpy as np
import matplotlib.pyplot as plt

def plot_series(time, series, format="-", start=0, end=None, label=None):
    plt.plot(time[start:end], series[start:end], format, label=label)
    plt.xlabel("Time")
    plt.ylabel("Value")
    if label:
        plt.legend(fontsize=14)
    plt.grid(True)

In [None]:
# trend and seasonality
def trend(time, slope=0):
  return slope * time

In [None]:
# example of time series that just trends upwards
time = np.arange(4 * 265 + 1)
baseline = 10
series = baseline = trend(time, 0.1)

plt.figure(figsize=(10,6))
plot_series(time, series)
plt.show()

In [None]:
time
series

In [None]:
# generate a time series with a seasonal pattern

def seasonal_pattern(season_time):
  # arbitrary pattern
  return np.where(season_time < 0.6,
                  np.cos(season_time * 2 * np.pi),
                  1 / np.exp (3 * season_time))
  
def seasonality(time, period, amplitude=1, phase=0):
  # repeats the same pattern at each period
  season_time = ((time + phase) & period ) / period
  return amplitude * seasonal_pattern(season_time)

In [None]:
amplitude = 40
series = seasonality(time, period=365, amplitude = amplitude)

plt.figure(figsize=(10,6))
plot_series(time, series)
plt.show()

In [None]:
# creating a time series with both trend and seasonlity
slope = 0.05
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)

plt.figure(figsize=(10,6))
plot_series(time, series)
plt.show()

Generate white noise, as in practice, few real-life time series have some noise

In [None]:
def white_noise(time, noise_level=1, seed=None):
  rnd = np.random.RandomState(seed)
  return rnd.randn(len(time)) * noise_level

In [None]:
noise_level = 5
noise = white_noise(time, noise_level, seed=42)

plt.figure(figsize=(10,6))
plot_series(time, noise)
plt.show()

In [None]:
# adding white noise to the time series
series += noise
plt.figure(figsize=(10,6))
plot_series(time, series)
plt.show()

# Naive Forecasting

In [None]:
# uses functions `plot_series`, `trend`, `seasonal_pattern`, `seasonality`,
# and `white_noise` from above

# new values
time = np.arange(4 * 365 + 1)
slope = 0.05
baseline = 10
amplitude = 40
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)

noise_level = 5
noise = white_noise(time, noise_level, seed=42)

series += noise

plt.figure(figsize=(10,6))
plot_series(time, series)
plt.show()


In [None]:
# split the graph into the training and validation periods
split_time = 1000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]

In [None]:
naive_forecast = series[split_time - 1:-1]

In [None]:
plt.figure(figsize=(10,6))
plot_series(time_valid, x_valid, label="Series")
plot_series(time_valid, naive_forecast, label="Forecast")

In [None]:
# zoom in on where the validation period starts
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid, start=0, end=150, label="Series")
plot_series(time_valid, naive_forecast, start=1, end=151, label="Forecast")

In [None]:
# compute the mean absolute error between the forecasts and the predictions
# in the validation period

# represents baseline model
errors = naive_forecast - x_valid
abs_errors = np.abs(errors)
mae = abs_errors.mean()
mae

# Moving Average
Based on [TF Example](https://colab.research.google.com/github/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l08c03_moving_average.ipynb)

In [None]:
import tensorflow as tf
keras = tf.keras

In [None]:
keras.metrics.mean_absolute_error(x_valid, naive_forecast).numpy()

In [None]:
# uses functions `plot_series`, `trend`, `seasonal_pattern`, `seasonality`,
# and `white_noise` from above
# basically uses the whole naive forecasting section

In [None]:
def moving_average_forecast(series, window_size):
  # forecasts the mean of the last few values
  # if window_size=1, then this is equiv to naive forecast
  forecast = []
  for time in range(len(series) - window_size):
    forecast.append(series[time:time + window_size].mean())
  return np.array(forecast)

In [None]:
def moving_average_forecast(series, window_size):
  # Forecasts the mean of the last few values.
  # If window_size=1, then this is equivalent to naive forecast
  # This implementation is *much* faster than the previous one (about 300x)
  mov = np.cumsum(series)
  mov[window_size:] = mov[window_size:] - mov[:-window_size]
  return mov[window_size - 1:-1] / window_size

In [None]:
moving_avg = moving_average_forecast(series, 30)[split_time - 30:]

plt.figure(figsize=(10,6))
plot_series(time_valid, x_valid, label="Series")
plot_series(time_valid, moving_avg, label="Moving average (30 days)")

In [None]:
keras.metrics.mean_absolute_error(x_valid, moving_avg).numpy()

This is worse than naive forecasting! It doesn't anticipate trend or seasonlity. When there's a lot of change, this model does not do well (delay).  

Let's try to remove the trends and seasonlity through differencing (t - 365).

In [None]:
diff_series = (series[365:] - series[:-365])
diff_time = time[365:]

plt.figure(figsize=(10,6))
plot_series(diff_time, diff_series, label="Series(t) - Series(t-365")
plt.show()

In [None]:
# zoom on validation period
plt.figure(figsize=(10,6))
plot_series(time_valid, diff_series[split_time - 365:], label="Series(t) - Series(t-365)")
plt.show()

In [None]:
# use the moving average and plot
diff_moving_avg = moving_average_forecast(diff_series, 50)[split_time - 365 - 50:]

plt.figure(figsize=(10, 6))
plot_series(time_valid, diff_series[split_time - 365:], label="Series(t) – Series(t–365)")
plot_series(time_valid, diff_moving_avg, label="Moving Average of Diff")
plt.show()

In [None]:
# bring trend and seasonality back by adding the past values from t - 365
diff_moving_avg_plus_past = series[split_time - 365:-365] + diff_moving_avg

plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid, label="Series")
plot_series(time_valid, diff_moving_avg_plus_past, label="Forecasts")
plt.show()

In [None]:
keras.metrics.mean_absolute_error(x_valid, diff_moving_avg_plus_past).numpy()

In [None]:
# instead of using the noisy past values themselves, use moving averaging on 
# them to remove some of the noise
diff_moving_avg_plus_smooth_past = moving_average_forecast(series[split_time - 370:-359], 11) + diff_moving_avg
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid, label="Series")
plot_series(time_valid, diff_moving_avg_plus_smooth_past, label="Forecasts")
plt.show()

In [None]:
keras.metrics.mean_absolute_error(x_valid, diff_moving_avg_plus_smooth_past).numpy()

# Time Windows

Based on: [Link to TF Example](https://colab.research.google.com/github/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l08c04_time_windows.ipynb)

In [None]:
dataset = tf.data.Dataset.range(10)
for val in dataset:
  print(val.numpy())

In [None]:
# original
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1)
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(), end=" ")
    print()

In [None]:
# cleaned dataset
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)

for x, y in dataset:
  print("x=", x.numpy())
  print("y=", y.numpy())


In [None]:
# final dataset funtion
def window_dataset(series, window_size, batch_size=32,
                   shuffle_buffer=1000):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
    dataset = dataset.shuffle(shuffle_buffer)
    dataset = dataset.map(lambda window: (window[:-1], window[-1]))
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset