# Tensorflow Data set tutorial - intoduction to most usefull concepts taken from tensor flow webpage tutorial

In [1]:
import tensorflow as tf

# printing type and shape of components in the data set elements

In [4]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))
print(dataset1.output_types)  # ==> "tf.float32"
print(dataset1.output_shapes)  # ==> "(10,)"

dataset2 = tf.data.Dataset.from_tensor_slices(
   (tf.random_uniform([4]),
    tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)))
print(dataset2.output_types)  # ==> "(tf.float32, tf.int32)"
print(dataset2.output_shapes)  # ==> "((), (100,))"

dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
print(dataset3.output_types)  # ==> (tf.float32, (tf.float32, tf.int32))
print(dataset3.output_shapes)  # ==> "(10, ((), (100,)))"

<dtype: 'float32'>
(10,)
(tf.float32, tf.int32)
(TensorShape([]), TensorShape([Dimension(100)]))
(tf.float32, (tf.float32, tf.int32))
(TensorShape([Dimension(10)]), (TensorShape([]), TensorShape([Dimension(100)])))


# naming components

In [5]:
dataset = tf.data.Dataset.from_tensor_slices(
   {"a": tf.random_uniform([4]),
    "b": tf.random_uniform([4, 100], maxval=100, dtype=tf.int32)})
print(dataset.output_types)  # ==> "{'a': tf.float32, 'b': tf.int32}"
print(dataset.output_shapes)  # ==> "{'a': (), 'b': (100,)}"

{'a': tf.float32, 'b': tf.int32}
{'a': TensorShape([]), 'b': TensorShape([Dimension(100)])}


# accessing data through an iterator

In [10]:
dataset = tf.data.Dataset.range(100)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next() # simplest iterator

with tf.Session() as sess:    
    for i in range(100):
        value = sess.run(next_element)
        assert i == value

In [12]:
max_value = tf.placeholder(tf.int64, shape=[])
dataset = tf.data.Dataset.range(max_value)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

with tf.Session() as sess:    
    # Initialize an iterator over a dataset with 10 elements.
    sess.run(iterator.initializer, feed_dict={max_value: 10})
    for i in range(10):
        value = sess.run(next_element)
        assert i == value

    # Initialize the same iterator over a dataset with 100 elements.
    sess.run(iterator.initializer, feed_dict={max_value: 100})
    for i in range(100):
        value = sess.run(next_element)
        assert i == value

# reinitializable iterator for different data sets

In [15]:
# Define training and validation datasets with the same structure.
training_dataset = tf.data.Dataset.range(100).map(
    lambda x: x + tf.random_uniform([], -10, 10, tf.int64))
validation_dataset = tf.data.Dataset.range(50)

# A reinitializable iterator is defined by its structure. We could use the
# `output_types` and `output_shapes` properties of either `training_dataset`
# or `validation_dataset` here, because they are compatible.
iterator = tf.data.Iterator.from_structure(training_dataset.output_types,
                                           training_dataset.output_shapes)
next_element = iterator.get_next()

training_init_op = iterator.make_initializer(training_dataset)
validation_init_op = iterator.make_initializer(validation_dataset)

with tf.Session() as sess:
    # Run 20 epochs in which the training dataset is traversed, followed by the
    # validation dataset.
    for _ in range(20):
        # Initialize an iterator over the training dataset.
        sess.run(training_init_op)
        for _ in range(100):
            sess.run(next_element)

        # Initialize an iterator over the validation dataset.
        sess.run(validation_init_op)
        for _ in range(50):
            sess.run(next_element)

# feedable iterator

In [88]:
# Define training and validation datasets with the same structure.
training_dataset = tf.data.Dataset.range(100).map(
    lambda x: x + tf.random_uniform([], -10, 10, tf.int64)).repeat()
validation_dataset = tf.data.Dataset.range(50)

# A feedable iterator is defined by a handle placeholder and its structure. We
# could use the `output_types` and `output_shapes` properties of either
# `training_dataset` or `validation_dataset` here, because they have
# identical structure.
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
    handle, training_dataset.output_types, training_dataset.output_shapes)
next_element = iterator.get_next()

# You can use feedable iterators with a variety of different kinds of iterator
# (such as one-shot and initializable iterators).
training_iterator = training_dataset.make_one_shot_iterator()
validation_iterator = validation_dataset.make_initializable_iterator()


with tf.Session() as sess:
    # The `Iterator.string_handle()` method returns a tensor that can be evaluated
    # and used to feed the `handle` placeholder.
    training_handle = sess.run(training_iterator.string_handle())
    validation_handle = sess.run(validation_iterator.string_handle())

    # Loop forever, alternating between training and validation.
    while True:
      # Run 200 steps using the training dataset. Note that the training dataset is
      # infinite, and we resume from where we left off in the previous `while` loop
      # iteration.
        for _ in range(200):
            sess.run(next_element, feed_dict={handle: training_handle})

      # Run one pass over the validation dataset.
        sess.run(validation_iterator.initializer)
        for _ in range(50):
            sess.run(next_element, feed_dict={handle: validation_handle})

KeyboardInterrupt: 

# consuming values from an iterator

In [94]:
dataset = tf.data.Dataset.range(10)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()

# Typically `result` will be the output of a model, or an optimizer's
# training operation.
result = tf.add(next_element, next_element)
with tf.Session() as sess:
    sess.run(iterator.initializer)
    print(sess.run(result))  # ==> "0"
    print(sess.run(result))  # ==> "2"
    print(sess.run(result))  # ==> "4"
    print(sess.run(result))  # ==> "6"
    print(sess.run(result))  # ==> "8"
    try:
        sess.run(result)
    except tf.errors.OutOfRangeError:
        print("End of dataset")  # ==> "End of dataset"

0
2
4
6
8


In [96]:
with tf.Session() as sess:
    sess.run(iterator.initializer)
    while True:
        try:
            x = sess.run(result)
            print(x)
        except tf.errors.OutOfRangeError:
            break

0
2
4
6
8
10
12
14
16
18


In [107]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 10]))
dataset2 = tf.data.Dataset.from_tensor_slices((tf.random_uniform([4]), tf.random_uniform([4, 100])))
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))

iterator = dataset3.make_initializable_iterator()
next1, (next2, next3) = iterator.get_next()

with tf.Session() as sess:
    sess.run(iterator.initializer)
    while True:
        try:
            x = sess.run((next2, next3))
            #print(x)
        except tf.errors.OutOfRangeError:
            break

# Reading input data

In [None]:
import gzip
with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f:
    file_content = f.read()

In [128]:
image_string = tf.read_file('bird.jpg')
image_decoded = tf.image.decode_image(image_string)
image_resized = tf.image.resize_image_with_crop_or_pad(image_decoded, 280, 280)


with tf.Session() as sess:
    result = sess.run(image_resized)
    print(result)
    print(result.shape)

[[[230 226 201]
  [227 223 196]
  [229 225 198]
  ...
  [ 94 139 172]
  [ 94 139 172]
  [ 95 138 170]]

 [[230 226 199]
  [227 223 196]
  [231 227 200]
  ...
  [ 91 136 167]
  [ 92 135 167]
  [ 91 135 164]]

 [[233 227 201]
  [232 226 200]
  [235 229 203]
  ...
  [ 90 134 163]
  [ 90 134 163]
  [ 89 133 162]]

 ...

 [[112 155 189]
  [108 151 183]
  [107 150 182]
  ...
  [ 83 122 127]
  [ 81 121 123]
  [ 80 120 120]]

 [[117 159 197]
  [119 162 197]
  [112 155 190]
  ...
  [ 90 128 141]
  [ 87 125 136]
  [ 86 124 133]]

 [[119 161 201]
  [120 164 203]
  [117 159 197]
  ...
  [ 92 130 149]
  [ 89 127 146]
  [ 88 126 145]]]
(280, 280, 3)


numpy.ndarray