## Exercise on manipulating `timeseries_dataset_from_array` and `window` functions

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
# Some artificial univariate time series data
data = np.arange(0,100)

In [None]:
# Use tf.keras.utils.timeseries_dataset_from_array to
# - chop the data into 10 non-overlapping windows of length 10
# - the resulting dataset should contain individual items
ds1 = tf.keras.utils.timeseries_dataset_from_array(
    data,
    targets=None,
    sequence_length=10,
    sequence_stride=10,
    batch_size=None
)

In [None]:
for item in ds1:
    print(f"{item} {item.shape}")
    print("*"*50)

In [None]:
# Same exercise using `window`. Start with tf.data.Dataset.range(100)
ds1_window = tf.data.Dataset.range(100).window(
    size=10,
).flat_map(lambda ds : ds.batch(10))

for item in ds1_window:
    print(f"{item} {item.shape}")
    print("*"*50)

In [None]:
# Use tf.keras.utils.timeseries_dataset_from_array to
# - chop the data into 10 non-overlapping windows of length 10
# - the resulting dataset should contain batches of three elements
ds2 = tf.keras.utils.timeseries_dataset_from_array(
    data,
    targets=None,
    sequence_length=10,
    sequence_stride=10,
    batch_size=3
)

In [None]:
for item in ds2:
    print(f"{item} {item.shape}")
    print("*"*50)

In [None]:
ds2_window = tf.data.Dataset.range(100).window(
    size=10,
).flat_map(lambda ds : ds.batch(10)).batch(3)

In [None]:
for item in ds2_window:
    print(f"{item} {item.shape}")
    print("*"*50)

In [None]:
# Use tf.keras.utils.timeseries_dataset_from_array to
# - chop the data into windows of length 10
# - consecutive windows should have 5 elements in common
# - the resulting dataset should contain batches of three elements
ds3 = tf.keras.utils.timeseries_dataset_from_array(
    data,
    targets=None,
    sequence_length=10,
    sequence_stride=5,
    batch_size=3
)

In [None]:
for item in ds3:
    print(f"{item} {item.shape}")
    print("*"*50)

In [None]:
ds3_window = (tf.data.Dataset.range(100).
  window(size=10, shift=5, drop_remainder=True).
  flat_map(lambda ds : ds.batch(10)).
  batch(3)
)

In [None]:
for item in ds3_window:
    print(f"{item} {item.shape}")
    print("*"*50)

In [None]:
# Use tf.keras.utils.timeseries_dataset_from_array to
# - chop the data into windows of length 10
# - consecutive windows should have 5 elements in common
# - the target for each window is the element immediately following this window
ds4 = tf.keras.utils.timeseries_dataset_from_array(
    data,
    targets=data[10:],
    sequence_length=10,
    sequence_stride=5,
    batch_size=None
)


In [None]:
for item, label in ds4:
    print(f"{item} maps to {label} and has shape {item.shape}")
    print("*"*50)

In [None]:
ds4_windows = (tf.data.Dataset.range(100).
               window(size=11, shift=5, drop_remainder=True).
               flat_map(lambda ds : ds.batch(11)).
               map(lambda series : (series[:-1], series[-1]))
)

In [None]:
for item, label in ds4_windows:
    print(f"{item} maps to {label} and has shape {item.shape}")
    print("*"*50)

In [None]:
# Try to "predict" multiple values in one go
# Inputs: windows of length 5
# Targets: windows of length 2
# Thus [0,1,2,3,4] -> [5,6]
#      [1,2,3,4,5] -> [6,7]
# No batching
# Solve using "window". Start with windows of length 7.
ds5_windows = tf.data.Dataset.range(100).window(
    size=7,
    shift=1,
    drop_remainder=True
).flat_map(lambda ds : ds.batch(7)
).map(lambda S : (S[:5], S[-2:]))

In [None]:
for item, label in ds5_windows:
    print(f"{item} maps to {label} and has shape {item.shape}")
    print("*"*50)

In [None]:
# Add shuffling (seed=42) and batching (batches of size 10) to the previous exercise
ds6_windows = (tf.data.Dataset.range(100).window(
    size=7,
    shift=1,
    drop_remainder=True)
  .flat_map(lambda ds : ds.batch(7))
  .map(lambda S : (S[:5], S[-2:]))
  .shuffle(buffer_size=1000, seed=42)
  .batch(10)
)

In [None]:
for item, label in ds6_windows:
    print(f"{item} maps to {label} and has shape {item.shape}")
    print("*"*50)

In [None]:
# Sequence to sequence model
# Input sequences of length 5
# Targets: sequences of length 5, shifted one to the right.
#          Thus when the input sequence is [0,1,2,3,4], the target
#          sequence is [1,2,3,4,5]
# No batching
ds7_windows = (tf.data.Dataset.range(100).window(
    size=2,
    shift=1,
    drop_remainder=True)
  .flat_map(lambda ds : ds.batch(2))
  .window(
      size=5,
      shift=1,
      drop_remainder=True)
  .flat_map(lambda ds: ds.batch(5))
  .map(lambda S : (S[:,0], S[:,-1]))
)

In [None]:
for item, label in ds7_windows:
    print(f"{item} maps to {label} and has shape {item.shape}")
    print("*"*50)

In [None]:
# Sequence to sequence model
# Input sequences of length 5
# Targets: sequences of length 5, where each sequence element consist of the next 3
#          data points
#          Thus when the input sequence is [0,1,2,3,4], the target
#          sequence is [[1,2,3],[2,3,4],[3,4,5],[4,5,6],[5,6,7]]
# No batching
ds8_windows = (tf.data.Dataset.range(100).window(
    size=1 + 3,
    shift=1,
    drop_remainder=True)
  .flat_map(lambda ds : ds.batch(1 + 3))
  .window(
      size=5,
      shift=1,
      drop_remainder=True)
  .flat_map(lambda ds: ds.batch(5))
  .map(lambda S : (S[:,0], S[:,1:]))
)

In [None]:
for item, label in ds8_windows:
    print(f"{item} maps to {label} and has shape {item.shape}")
    print("*"*50)