In [58]:
import tensorflow as tf


### from numpy array or python list

In [2]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) 
for element in dataset: 
    print(element)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)


In [60]:
import numpy as np
a = np.array([1,2,3, 4])

dataset = tf.data.Dataset.from_tensor_slices(a) 
for element in dataset: 
    print(element)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)


### process lines from files

In [10]:
dataset = tf.data.TextLineDataset(["1.txt", "2.txt"]) 
for element in dataset: 
    print(element)

tf.Tensor(b'\xef\xbb\xbfsalam', shape=(), dtype=string)
tf.Tensor(b'hi', shape=(), dtype=string)
tf.Tensor(b'bye', shape=(), dtype=string)
tf.Tensor(b'100', shape=(), dtype=string)
tf.Tensor(b'200', shape=(), dtype=string)
tf.Tensor(b'500', shape=(), dtype=string)


### From TFRecord format

In [21]:
dataset = tf.data.TFRecordDataset(["file1.tfrecords", "file2.tfrecords"])

In [105]:
#https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001

dataset = tf.data.TFRecordDataset(filenames = ["fsns.tfrec"])

In [108]:
raw_example = next(iter(dataset))
parsed = tf.train.Example.FromString(raw_example.numpy())
parsed.features.feature['image/text']


bytes_list {
  value: "Rue Perreyon"
}

### To create a dataset of all files matching a pattern

In [23]:
dataset = tf.data.Dataset.list_files("*.txt")
for element in dataset: 
    print(element)

tf.Tensor(b'.\\2.txt', shape=(), dtype=string)
tf.Tensor(b'.\\1.txt', shape=(), dtype=string)


### From Python generator

Creates a Dataset whose elements are generated by generator.

The generator argument must be a callable object that returns an object that supports the iter() protocol (e.g. a generator function). The elements generated by generator must be compatible with the given output_types and (optional) output_shapes arguments.

In [47]:
def fib(n):
    a, b = 0, 1
    for _ in range(n):
        yield a
        a, b = b, a + b

In [51]:
for e in fib(4):
    print (e)

0
1
1
2


In [55]:
dataset = tf.data.Dataset.from_generator(
    fib, args=[8], output_types=tf.int32, output_shapes = (), )
for element in dataset: 
    print(element)

tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(13, shape=(), dtype=int32)
