# Creating Features and Target from a self made Tensorflow Dataset

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)

2.3.0


Creating Tensorflow Dataset Object

In [3]:
dataset = tf.data.Dataset.range(10)
for val in dataset:
  print(val.numpy())

0
1
2
3
4
5
6
7
8
9


Creating a windowed-Dataset out of the earlier Dataset

In [6]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1)
for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end=" ")
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
6 7 8 9 
7 8 9 
8 9 
9 


Filter this windowed dataset and keeping only the data with consistent length

In [7]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
for window in dataset:
  for val in window:
    print(val.numpy(), end = " ")
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


Breaking this data in segments to be used as `m` training example (here `m` == 6)

In [9]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
for window in dataset:
  print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


Breaking the dataset further to extract features and targets out of them

In [10]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
for x,y in dataset:
  print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


Now shuffling the dataset

In [11]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, 1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(10)
for x,y in dataset:
  print(x.numpy(), y.numpy())


[0 1 2 3] [4]
[3 4 5 6] [7]
[4 5 6 7] [8]
[2 3 4 5] [6]
[5 6 7 8] [9]
[1 2 3 4] [5]


Creating batches out of this dataset

In [23]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift = 1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5))
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
dataset = dataset.shuffle(10)
dataset = dataset.batch(2).prefetch(1)

for x,y in dataset:
  print("X:", x.numpy())
  print("Y:", y.numpy())
  print()

X: [[1 2 3 4]
 [5 6 7 8]]
Y: [[5]
 [9]]

X: [[2 3 4 5]
 [0 1 2 3]]
Y: [[6]
 [4]]

X: [[3 4 5 6]
 [4 5 6 7]]
Y: [[7]
 [8]]

