<a href="https://colab.research.google.com/github/Bhandari007/Timeseries-and-sequence-models/blob/main/C4_W2_Lab1_features_and_labels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preparing Time Series Features and Labels

In this lab, we will prepare time series data into features and labels that we can use to train a model. This is mainly achieved by a *windowing* technique where in we group consecutive measurement values into one feature and the next measurement will be the label. For example, in hourly measurements, we can use values taken at hours 1 to 11 to predict the value at hour 12.

# Imports

In [1]:
import tensorflow as tf

# Create a Simple Dataset

In [2]:
dataset = tf.data.Dataset.range(10)

for val in dataset:
  print(val.numpy())

0
1
2
3
4
5
6
7
8
9


# Windowing the data

In [7]:
# Generate a tf dataset with 10 elements
dataset = tf.data.Dataset.range(10)

dataset = dataset.window(size = 5, shift = 1)

for window_dataset in dataset:
  print(window_dataset)

<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>
<_VariantDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>


In [8]:
# Print the result
for window_dataset in dataset:
  print([item.numpy() for item in window_dataset])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]
[6, 7, 8, 9]
[7, 8, 9]
[8, 9]
[9]


In [10]:
# Generate a tf dataset with 10 elements
dataset = tf.data.Dataset.range(10)

# Window the data but only take those with the specified size
dataset = dataset.window(size = 5, shift = 1, drop_remainder = True)

for window_dataset in dataset:
  print([item.numpy() for item in window_dataset])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]


# Flatten the Windows

In [11]:
# Generate a tf dataset with 10 elements
dataset = tf.data.Dataset.range(10)

# Window the data but only take those with the specified size
dataset = dataset.window(size = 5, shift = 1, drop_remainder = True)

# Flatten the windows by putting its elements in a single batch
dataset = dataset.flat_map(lambda window: window.batch(5))

for window in dataset:
  print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


# Group into features and labels

In [12]:
dataset = dataset.map(lambda window : (window[:-1], window[-1]))

for x, y in dataset:
  print(f"x = {x.numpy()}")
  print(f"y = {y.numpy()}")

x = [0 1 2 3]
y = 4
x = [1 2 3 4]
y = 5
x = [2 3 4 5]
y = 6
x = [3 4 5 6]
y = 7
x = [4 5 6 7]
y = 8
x = [5 6 7 8]
y = 9


# Shuffle the data

In [15]:
dataset = tf.data.Dataset.range(10)

dataset = dataset.window(5, shift = 1, drop_remainder = True)

dataset = dataset.flat_map(lambda window: window.batch(5))

dataset = dataset.map(lambda window: (window[:-1], window[-1]))

# Shuffle the windows
dataset = dataset.shuffle(buffer_size = 10)

# Print the results
for x,y in dataset:
  print(f"x = {x.numpy()}")
  print(f"y = {y.numpy()}")

x = [2 3 4 5]
y = 6
x = [3 4 5 6]
y = 7
x = [4 5 6 7]
y = 8
x = [1 2 3 4]
y = 5
x = [5 6 7 8]
y = 9
x = [0 1 2 3]
y = 4


# Create batches for training

In [16]:
dataset = dataset.batch(2).prefetch(1)

for x, y in dataset:
  print(x.numpy())
  print(y.numpy())

[[5 6 7 8]
 [3 4 5 6]]
[9 7]
[[2 3 4 5]
 [1 2 3 4]]
[6 5]
[[0 1 2 3]
 [4 5 6 7]]
[4 8]
