In [1]:
import tensorflow as tf

In [3]:
# Create a dataset of 10 values from 0 to 9. These values are stored as tensors.
dataset = tf.data.Dataset.range(10)
for val in dataset:
    print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [15]:
# Create a window of window_size 5 which will shift by 1 step for every iteration. In this way, we get 
# a dataset containing datasets of windows of size 5.
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5,shift=1)
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(),end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
6 7 8 9 
7 8 9 
8 9 
9 


In [14]:
# drop_remainder drops datasets which are not of window_size, because ML models need input features of same size
dataset = tf.data.Dataset.range(10).window(5,shift=1,drop_remainder=True)
for window_dataset in dataset:
    for val in window_dataset:
        print(val.numpy(),end=" ")
    print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


In [26]:
# flat_map takes each window_dataset in dataset and groups it in batch of size 5 as a single tensor
dataset = tf.data.Dataset.range(10).window(5,shift=1,drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
for window_dataset in dataset:
    print(window_dataset.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


In [29]:
# Now we separate these individual datasets into input_features and labels to feed to ML model
dataset = tf.data.Dataset.range(10).window(5,shift=1,drop_remainder=True).flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1],window[-1:]))
for x,y in dataset:
    print(x.numpy(),y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


In [33]:
# Shuffling the dataset
dataset = tf.data.Dataset.range(10).window(5,shift=1,drop_remainder=True).flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1],window[-1:])).shuffle(buffer_size = 10)
for x,y in dataset:
    print(x.numpy(),y.numpy())

[2 3 4 5] [6]
[0 1 2 3] [4]
[1 2 3 4] [5]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


In [37]:
# Finally we create batches of 2 such datasets because ML models need batches of dataset for training
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5,shift=1,drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window : (window[:-1],window[-1:]))
dataset = dataset.shuffle(buffer_size=10)
dataset = dataset.batch(2).prefetch(1)
for x,y in dataset:
    print("x = {}".format(x.numpy()),"\ny = {}".format(y.numpy()))

x = [[4 5 6 7]
 [5 6 7 8]] 
y = [[8]
 [9]]
x = [[0 1 2 3]
 [2 3 4 5]] 
y = [[4]
 [6]]
x = [[1 2 3 4]
 [3 4 5 6]] 
y = [[5]
 [7]]


In [39]:
# A function which will perform above operations and return a final dataset
def window_dataset(series,window_size,batch_size=32,shuffle_buffer=100):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1,shift=1,drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size+1))
    dataset = dataset.map(lambda window: (window[:-1],window[-1:]))
    dataset = dataset.shuffle(shuffle_buffer)
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset