<a href="https://colab.research.google.com/github/Dansah2/Udacity_Tutorials/blob/main/Udacity_TimeWindows.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

In [None]:
# create a tensor dataset of 10 elements
dataset = tf.data.Dataset.range(10)
for val in dataset:
  print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [None]:
# call window method to create a window dataset of 10 windows
# each window has 5 elements
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1)
for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end='')
  print()

01234
12345
23456
34567
45678
56789
6789
789
89
9


In [None]:
# it is important that all of the windows are of the same length
# to acomplish this, add drop_remainder=True when calling the window method
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end='')
  print()

01234
12345
23456
34567
45678
56789


In [None]:
# We want to obtain a single dataset containg batches of data in
# the form of regular tensors
# we accomplish this by calling the flat_map() method
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
for window in dataset:
  print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


In [None]:
# break the features and the labels into two tensors
# the features are the first four elements of the window
# the label is the last element in the window
# use the map function to accomplish this task

dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
for x, y in dataset:
  print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


In [None]:
# lets shuffle the windows to randomize them
# note that this does not shuffle the data contained in each window

dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
for x, y in dataset:
  print(x.numpy(), y.numpy())

[1 2 3 4] [5]
[2 3 4 5] [6]
[4 5 6 7] [8]
[0 1 2 3] [4]
[5 6 7 8] [9]
[3 4 5 6] [7]


In [None]:
# create the number of training batches by calling the batch() method
# load the next batch of data during the training of a previous batch
# of data by calling the prefetch() method

dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)
for x, y in dataset:
  print("x =", x.numpy())
  print("y =", y.numpy())

x = [[5 6 7 8]
 [0 1 2 3]]
y = [[9]
 [4]]
x = [[3 4 5 6]
 [2 3 4 5]]
y = [[7]
 [6]]
x = [[4 5 6 7]
 [1 2 3 4]]
y = [[8]
 [5]]


In [None]:
# this is the function you would ultimately used to to all of the above lines of code
# note that in instead of using range() you would use from_tensor_slices()

def window_dataset(series, window_size, batch_size=32, shuffle_buffer=1000):
  dataset = tf.data.Dataset.from_tensor_slices(series)
  dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
  dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
  dataset = dataset.shuffle(shuffle_buffer)
  dataset = dataset.map(lambda window: (window[:-1], window[-1]))
  dataset = dataset.batch(batch_size).prefetch(1)
  return dataset