In [15]:
# ---------------------------------------------------------------------------- #
# Method 1 - Input pipeline with Dataset
# ---------------------------------------------------------------------------- #
# https://www.tensorflow.org/programmers_guide/datasets

import tensorflow as tf

"""
In the examle code at the above URL, they have a line in their _parse_function:
image_resized = tf.image.resize_images(image_decoded, [28, 28])
This throws an error because decode_image() returns a variable without shape
because gifs have a 4 dimensions (animation) instead of three. To resize images,
use the specific decode_jpeg() and decode_png() methods. 
"""
def _parse_function(data_file, label_file):
    data_string = tf.read_file(data_file)
    label_string = tf.read_file(label_file)
    data = tf.image.decode_image(data_string)
    label = tf.image.decode_image(label_string)
    return data, label
    
"""
Getting filenames of data -
There are two ways of doing this, both are below. The first wat creates a 
constant tensor by manually hardcoding in the names of every data file. This is 
not scalable of flexible (it's possible to use os.listdir, but this is messy). 
The second way is to use tf.train.match_filenames_once. This is the way to go.
In the documentation for this at:
https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/train/match_filenames_once
it says it returns a variable initialized to the list of filenames, but if you
run this through the rest of the code you'll get the error:
FailedPreconditionError: Attempting to use uninitialized value matching_filenames_9
or some other matching filename. The solution to this is to use the
initialized_value() method for Variables, which I guess initalized each item in 
the list the variable is initialized to. 
https://stackoverflow.com/questions/36007883/tensorflow-attempting-to-use-uninitialized-value-in-variable-initialization
"""
# Get data
# train_imgs = tf.constant(['train_data/ADE_train_00012440.jpg', 
#                           'train_data/ADE_train_00012557.jpg'])

train_data_filenames = tf.train.match_filenames_once('train_data/*.jpg').initialized_value()

# train_labels = tf.constant(['train_labels/ADE_train_00012440_seg.png', 
#                             'train_labels/ADE_train_00012557_seg.png'])

train_labels_filenames = tf.train.match_filenames_once('train_labels/*.png').initialized_value()



"""
Creating TensorFlow Dataset object -
Use the from_tensor_slices() method as shown below. As the documentation 
describes at:
https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices
if the agrument is a nested tuple of tensors, the tensors must all have the 
same size in the zero dimension. Each element of the dataset will have the same 
nested structure as the argument, but instead of tensors as elements it will have
the element of each tensor in the argument, at the index of the element in the
dataset. For example, if we had
tf.data.Dataset.from_tensor_slices((tensor1, tensor2))
then the first element would be
(tensor1[0], tensor2[0])
the second element would be
(thesor1[1], tensor2[1])
and so on.

The map() method for Dataset objects takes in a function, performs that
function on each element in the dataset, and then returns the transformed 
dataset. For our purposes, we want to transform our dataset of filenames into
a dataset of decoded image files. 
"""
# Create dataset
train_dataset = tf.data.Dataset.from_tensor_slices((train_data_filenames, 
                                              train_labels_filenames))
train_dataset = train_dataset.map(_parse_function)

"""
Create TensorFlow Iterator object -
There are two ways to do this, both shown below. There isn't a big difference
between the two of them. I think the better way is to keep with the Dataset 
class method to do it, but under the hood I'm pretty sure it's the same as the 
first method.
"""
# Create iterator
# iterator = tf.data.Iterator.from_structure(train_dataset.output_types, 
#                                            train_dataset.output_shapes)
iterator = train_dataset.make_initializable_iterator()

next_element = iterator.get_next()


"""
Initialize stuff -
Normally it's necessary to initialize global and local variables, but in this
example it's not. All that needs to be initialized is the iterator.
"""
# Initialize stuff
# init = (tf.global_variables_initializer(), 
#         tf.local_variables_initializer(), 
#         iterator.make_initializer(train_dataset))

init = iterator.make_initializer(train_dataset)

with tf.Session() as sess:

    # Initialize the iterator on the training data
    sess.run(init)

    # get each element of the training dataset until the end is reached
    while True:
        try:
            elem = sess.run(next_element)
            print(elem)
        except tf.errors.OutOfRangeError:
            print("End of training dataset.")
            break

(array([[[ 35,  34,  32],
        [ 38,  37,  35],
        [ 27,  26,  24],
        ...,
        [166, 188, 209],
        [166, 188, 211],
        [166, 188, 211]],

       [[ 49,  49,  49],
        [ 46,  46,  44],
        [ 31,  31,  31],
        ...,
        [166, 188, 209],
        [166, 188, 211],
        [166, 188, 211]],

       [[ 61,  62,  64],
        [ 52,  54,  53],
        [ 39,  40,  42],
        ...,
        [166, 188, 209],
        [166, 188, 211],
        [166, 188, 211]],

       ...,

       [[  2,   7,   1],
        [  2,   7,   1],
        [  2,   7,   1],
        ...,
        [  5,   6,   1],
        [  6,   7,   2],
        [  7,   8,   3]],

       [[  4,   9,   3],
        [  4,   9,   3],
        [  4,   9,   3],
        ...,
        [  5,   6,   1],
        [  6,   7,   2],
        [  7,   8,   3]],

       [[  5,  10,   4],
        [  5,  10,   4],
        [  6,  11,   5],
        ...,
        [  5,   6,   1],
        [  6,   7,   2],
        [  6,   7,   2]

In [18]:
# ---------------------------------------------------------------------------- #
# Method 2 - Input pipeline with Queue
# ---------------------------------------------------------------------------- #
# https://gist.github.com/eerwitt/518b0c9564e500b4b50f

import tensorflow as tf

"""
See Method 1 for documentation on getting filenames
"""
# filenames = tf.constant(['train_data/ADE_train_00012440.jpg', 
#                           'train_data/ADE_train_00012557.jpg'])

filenames = tf.train.match_filenames_once('train_data/*.jpg')

# Create a TensorFlow queue from the list of filenames
filename_queue = tf.train.string_input_producer(filenames)

"""
Create a TensorFlow WholeFileReader object -
The read() method  reads the next file from the quueue and returns a tuple. 
The first element is the filename or 'key', the second element is the string 
of file data or 'value'. For our purposes we don't care about the filename and 
focus on the data. 

The decode_image() method takes a string of file data as a argument and decodes it
into a Dense Tensor based on it's format. There are decode_jpeg() and decode_png() 
methods as well as specific methods for other image formats. The decode_image() method 
is a convience method that will detect the image format and properly decode it. 

"""
# Create reader
image_reader = tf.WholeFileReader()

# Read file from queue (we don't use the filename returned)
filename, image_file = image_reader.read(filename_queue)

# Decode image into Dense Tensor
image = tf.image.decode_image(image_file)

"""
Initialize stuff - 
Here we actually do need to initialize both global and local variables
"""
# Initialize stuff
init = (tf.global_variables_initializer(),
        tf.local_variables_initializer())

# Start a new session to show example output.
with tf.Session() as sess:
    # Ititialize global and local variables
    sess.run(init)

    # Coordinate the loading of image files.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    # Get the first image tensor from the queue and print its value.
    image_tensor = sess.run([image])
    print(image_tensor)
    # Get the second image tensor from the queue and print its value.
    image_tensor = sess.run([image])
    print(image_tensor)

    # Stop the filename queue coordinator.
    coord.request_stop()
    coord.join(threads)

[array([[[ 80,  91,  51],
        [ 78,  89,  49],
        [ 75,  85,  48],
        ...,
        [247, 253, 253],
        [247, 253, 253],
        [247, 253, 253]],

       [[ 89, 100,  60],
        [ 82,  93,  53],
        [ 74,  84,  47],
        ...,
        [248, 254, 254],
        [248, 254, 254],
        [248, 254, 254]],

       [[ 80,  91,  51],
        [ 79,  90,  50],
        [ 81,  91,  54],
        ...,
        [248, 254, 254],
        [248, 254, 254],
        [248, 254, 254]],

       ...,

       [[ 29,  47,  23],
        [ 26,  46,  21],
        [ 30,  50,  23],
        ...,
        [ 40,  78,   3],
        [ 21,  57,   0],
        [ 48,  84,  10]],

       [[ 30,  50,  25],
        [ 32,  55,  29],
        [ 30,  53,  25],
        ...,
        [ 66, 104,  31],
        [ 36,  72,   0],
        [ 47,  80,   9]],

       [[ 44,  67,  41],
        [ 33,  56,  30],
        [ 11,  36,   7],
        ...,
        [ 49,  87,  14],
        [ 24,  57,   0],
        [ 33,  66,   0]

In [3]:
filenames_match = tf.train.match_filenames_once('train_data/*.jpg')
filenames_const = tf.constant(['train_data/ADE_train_00012440.jpg', 
                               'train_data/ADE_train_00012557.jpg'])

init = (tf.global_variables_initializer(), tf.local_variables_initializer())

print(filenames_match)
print(filenames_const)

with tf.Session() as sess:
    sess.run(init)
    print(sess.run(filenames_match))
    print(sess.run(filenames_const))


<tf.Variable 'matching_filenames_1:0' shape=<unknown> dtype=string_ref>
Tensor("Const_2:0", shape=(2,), dtype=string)
[b'train_data/ADE_train_00012440.jpg' b'train_data/ADE_train_00012557.jpg']
[b'train_data/ADE_train_00012440.jpg' b'train_data/ADE_train_00012557.jpg']
