In [1]:
import tensorflow as tf

In [16]:
dataset=tf.data.Dataset.range(10)
dataset

<RangeDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>

# create a simple dataset

In [17]:
for val in dataset:
    print(val.numpy())

0
1
2
3
4
5
6
7
8
9


# windowing the data

In [18]:
# dataset=tf.data.Dataset.range(10)
dataset=dataset.window(size=5,shift=1)
for val in dataset:
    print([i.numpy() for i in val])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]
[6, 7, 8, 9]
[7, 8, 9]
[8, 9]
[9]


# flatten the window
In training the model later, you will want to prepare the windows to be tensors instead of the Dataset structure. You can do that by feeding a mapping function to the flat_map() method. This function will be applied to each window and the results will be flattened into a single dataset. To illustrate, the code below will put all elements of a window into a single batch then flatten the result.

In [42]:
dataset=tf.data.Dataset.range(10)
dataset=dataset.window(5,shift=1,drop_remainder=True)
dataset=dataset.flat_map(lambda window: window.batch(5))
for data in dataset:
    print([i.numpy() for i in data])

[0, 1, 2, 3, 4]
[1, 2, 3, 4, 5]
[2, 3, 4, 5, 6]
[3, 4, 5, 6, 7]
[4, 5, 6, 7, 8]
[5, 6, 7, 8, 9]


# group into features and labels

In [39]:
dataset=tf.data.Dataset.range(10)
dataset=dataset.window(5,shift=1,drop_remainder=True)
dataset=dataset.flat_map(lambda window:window.batch(5))
dataset=dataset.map(lambda window: (window[:-1],window[-1]))

for x,y in dataset:
    print(x.numpy())
    print(y.numpy())

[0 1 2 3]
4
[1 2 3 4]
5
[2 3 4 5]
6
[3 4 5 6]
7
[4 5 6 7]
8
[5 6 7 8]
9


# shuffle the data

In [44]:
dataset=tf.data.Dataset.range(10)
dataset=dataset.window(5,shift=1,drop_remainder=True)
dataset=dataset.flat_map(lambda window:window.batch(5))
dataset=dataset.map(lambda window: (window[:-1],window[-1]))

dataset=dataset.shuffle(buffer_size=10)
for x,y in dataset:
    print(x.numpy())
    print(y.numpy())

[4 5 6 7]
8
[1 2 3 4]
5
[5 6 7 8]
9
[2 3 4 5]
6
[3 4 5 6]
7
[0 1 2 3]
4


# create batches for training

In [48]:
dataset=tf.data.Dataset.range(10)
dataset=dataset.window(5,shift=1,drop_remainder=True)
dataset=dataset.flat_map(lambda window:window.batch(5))
dataset=dataset.map(lambda window: (window[:-1],window[-1]))

dataset=dataset.shuffle(buffer_size=10)
dataset=dataset.batch(2).prefetch(1)
for x,y in dataset:
    print(x.numpy())
    print(y.numpy())

[[2 3 4 5]
 [0 1 2 3]]
[6 4]
[[1 2 3 4]
 [4 5 6 7]]
[5 8]
[[3 4 5 6]
 [5 6 7 8]]
[7 9]
