# Sequence Preprocessing for Training

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)

2.2.0


In [2]:
# create a dataset of range 10
dataset = tf.data.Dataset.range(10)

for val in dataset:
   print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [3]:
# create a dataset of range 10
dataset = tf.data.Dataset.range(10)

# create 5 windows with a shift of 1 (left-to-right series)
dataset = dataset.window(5, shift=1)

for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end=" ")
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
6 7 8 9 
7 8 9 
8 9 
9 


In [4]:
# create a dataset of range 10
dataset = tf.data.Dataset.range(10)

# create 5 windows with a shift of 1 + clip them to same length (left-to-right series)
dataset = dataset.window(5, shift=1, drop_remainder=True)

for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end=" ")
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


In [5]:
# create a dataset of range 10
dataset = tf.data.Dataset.range(10)

# create 5 windows with a shift of 1 + clip them to same length (left-to-right series)
dataset = dataset.window(5, shift=1, drop_remainder=True)

# map the windows to horizontal np arrays
dataset = dataset.flat_map(lambda window: window.batch(5))

for window in dataset:
  print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


In [6]:
# create a dataset of range 10
dataset = tf.data.Dataset.range(10)

# create 5 windows with a shift of 1 + clip them to same length (left-to-right series)
dataset = dataset.window(5, shift=1, drop_remainder=True)

# map the windows to horizontal np arrays
dataset = dataset.flat_map(lambda window: window.batch(5))

# make out of each array an array from beginning to second but last (features) and an array with last index (label)
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))

for x,y in dataset:
  print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


In [7]:
# create a dataset of range 10
dataset = tf.data.Dataset.range(10)

# create 5 windows with a shift of 1 + clip them to same length (left-to-right series)
dataset = dataset.window(5, shift=1, drop_remainder=True)

# map the windows to horizontal np arrays
dataset = dataset.flat_map(lambda window: window.batch(5))

# make out of each array an array from beginning to second but last (features) and an array with last index (label)
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))

# shuffle order of arrays
dataset = dataset.shuffle(buffer_size=10)

for x,y in dataset:
  print(x.numpy(), y.numpy())

[4 5 6 7] [8]
[0 1 2 3] [4]
[3 4 5 6] [7]
[1 2 3 4] [5]
[2 3 4 5] [6]
[5 6 7 8] [9]


In [8]:
# create a dataset of range 10
dataset = tf.data.Dataset.range(10)

# create 5 windows with a shift of 1 + clip them to same length (left-to-right series)
dataset = dataset.window(5, shift=1, drop_remainder=True)

# map the windows to horizontal np arrays
dataset = dataset.flat_map(lambda window: window.batch(5))

# make out of each array an array from beginning to second but last (features) and an array with last index (label)
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))

# shuffle order of arrays
dataset = dataset.shuffle(buffer_size=10)

# create batches of two arrays
dataset = dataset.batch(2).prefetch(1)

for x,y in dataset:
  print("x = ", x.numpy())
  print("y = ", y.numpy())

x =  [[0 1 2 3]
 [1 2 3 4]]
y =  [[4]
 [5]]
x =  [[2 3 4 5]
 [5 6 7 8]]
y =  [[6]
 [9]]
x =  [[4 5 6 7]
 [3 4 5 6]]
y =  [[8]
 [7]]
