In [1]:
import tensorflow as tf
import numpy as np

In [2]:
#for slicing error message
import sys, os

In [3]:
x = np.random.sample((100,2))
print("First  5 pairs in x: \n", x[:5], "\n" )
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print(sess.run(el))

First  5 pairs in x: 
 [[0.28685276 0.63100043]
 [0.64307607 0.34817589]
 [0.72643868 0.86969373]
 [0.49805229 0.5265903 ]
 [0.48830834 0.3464211 ]] 

[0.28685276 0.63100043]


In [4]:
# using two numpy arrays
features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))
print("First  5 features in features: \n", features[:5], "\n" )
print("First  5 labels in labels: \n", labels[:5], "\n" )

dataset = tf.data.Dataset.from_tensor_slices((features,labels))

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print(sess.run(el))

First  5 features in features: 
 [[0.47763518 0.10118737]
 [0.88673148 0.1753077 ]
 [0.39861486 0.55617885]
 [0.18044922 0.29457104]
 [0.99059056 0.98068558]] 

First  5 labels in labels: 
 [[0.17502057]
 [0.15836931]
 [0.2899051 ]
 [0.58924805]
 [0.39903524]] 

(array([0.47763518, 0.10118737]), array([0.17502057]))


In [5]:
# using a tensor
dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([100, 2]))
print("dataset shape: ", np.shape(dataset))
print("dataset: ", (dataset))

iter = dataset.make_initializable_iterator()
el = iter.get_next()

with tf.Session() as sess:
    sess.run(iter.initializer)
    print(sess.run(el))

dataset shape:  ()
dataset:  <TensorSliceDataset shapes: (2,), types: tf.float32>
[0.9658345 0.9390756]


In [6]:
# using a placeholder
x = tf.placeholder(tf.float32, shape=[None,2])
dataset = tf.data.Dataset.from_tensor_slices(x)

data = np.random.sample((100,2))
print("First  5 pairs in data: \n", data[:5], "\n" )

iter = dataset.make_initializable_iterator()
el = iter.get_next()

with tf.Session() as sess:
    sess.run(iter.initializer, feed_dict={ x: data })
    print(sess.run(el))

First  5 pairs in data: 
 [[0.98745784 0.76018421]
 [0.23984296 0.52829117]
 [0.36874844 0.58930026]
 [0.79183049 0.03393259]
 [0.69591143 0.67569711]] 

[0.9874578 0.7601842]


In [7]:
# from generator
sequence = np.array([[[1]],[[2],[3]],[[3],[4],[5]]])

def generator():
    for el in sequence:
        yield el

dataset = tf.data.Dataset().batch(1).from_generator(generator,
                                           output_types= tf.int64, 
                                           output_shapes=(tf.TensorShape([None, 1])))

iter = dataset.make_initializable_iterator()
el = iter.get_next()

with tf.Session() as sess:
    sess.run(iter.initializer)
    print(sess.run(el),"\n")
    print(sess.run(el),"\n")
    print(sess.run(el),"\n")


[[1]] 

[[2]
 [3]] 

[[3]
 [4]
 [5]] 



In [8]:
# initializable iterator to switch between data
EPOCHS = 10

x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
dataset = tf.data.Dataset.from_tensor_slices((x, y))

train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.array([[1,2]]), np.array([[0]]))

iter = dataset.make_initializable_iterator()
features, labels = iter.get_next()

with tf.Session() as sess:
#     initialise iterator with train data
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
    for _ in range(EPOCHS):
        sess.run([features, labels])
#     switch to test data
    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
    print(sess.run([features, labels]))

    
    

[array([1., 2.], dtype=float32), array([0.], dtype=float32)]


In [9]:
# Reinitializable iterator to switch between Datasets
#EVIDENCE: Can re-read the same data
NO_OF_BATCHES = 10
DATA_ITEMS = 10
NO_OF_EPOCHS = 2
# making fake data using numpy
train_data = (np.random.sample((DATA_ITEMS,2)), np.random.sample((DATA_ITEMS,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))

# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
test_dataset = tf.data.Dataset.from_tensor_slices(test_data)

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)

assert(train_dataset.output_types == test_dataset.output_types)
assert(train_dataset.output_shapes == test_dataset.output_shapes)

features, labels = iter.get_next()

# create the initialisation operations
train_init_op = iter.make_initializer(train_dataset)
test_init_op = iter.make_initializer(test_dataset)

with tf.Session() as sess:
    for epoch in range(NO_OF_EPOCHS):
        print("Epoch: ", epoch)
        sess.run(train_init_op) # switch to train dataset
        for _ in range(NO_OF_BATCHES):
            print("Train: ",sess.run([features, labels]))
        print()
        
        COMMENT_OUT = True
        if(not COMMENT_OUT):
            sess.run(train_init_op) # switch to train dataset
            for _ in range(NO_OF_BATCHES):
                print("Train: ",sess.run([features, labels]))
            print()
            
        sess.run(test_init_op) # switch to val dataset
        for _ in range(NO_OF_BATCHES):
            print("Test: ",sess.run([features, labels]))

    
    

Epoch:  0
Train:  [array([0.30806043, 0.1002669 ]), array([0.1682616])]
Train:  [array([0.37143655, 0.8316497 ]), array([0.44658544])]
Train:  [array([0.68813665, 0.09992314]), array([0.77531029])]
Train:  [array([0.15658696, 0.50050271]), array([0.06538125])]
Train:  [array([0.89562205, 0.4644704 ]), array([0.22641821])]
Train:  [array([0.8849491 , 0.18134612]), array([0.18742334])]
Train:  [array([0.72141324, 0.22196128]), array([0.74209331])]
Train:  [array([0.53484265, 0.51675819]), array([0.27482371])]
Train:  [array([0.61364882, 0.60404416]), array([0.02574554])]
Train:  [array([0.54130389, 0.62880368]), array([0.840464])]

Test:  [array([0.89432699, 0.51992212]), array([0.83652204])]
Test:  [array([0.76349031, 0.31493326]), array([0.95570562])]
Test:  [array([0.89225831, 0.48527156]), array([0.78004326])]
Test:  [array([0.45998032, 0.66667351]), array([0.93703739])]
Test:  [array([0.61365475, 0.45763402]), array([0.84526684])]
Test:  [array([0.06362408, 0.49845725]), array([0.33

In [10]:
# feedable iterator to switch between iterators
EPOCHS = 10
# making fake data using numpy
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))
# create placeholder
x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices((x,y))
test_dataset = tf.data.Dataset.from_tensor_slices((x,y))
# create the iterators from the dataset
train_iterator = train_dataset.make_initializable_iterator()
test_iterator = test_dataset.make_initializable_iterator()
# same as in the doc https://www.tensorflow.org/programmers_guide/datasets#creating_an_iterator
handle = tf.placeholder(tf.string, shape=[])
iter = tf.data.Iterator.from_string_handle(
    handle, train_dataset.output_types, train_dataset.output_shapes)
next_elements = iter.get_next()

with tf.Session() as sess:
    train_handle = sess.run(train_iterator.string_handle())
    test_handle = sess.run(test_iterator.string_handle())
    
    # initialise iterators. In our case we could have used the 'one-shot' iterator instead,
    # and directly feed the data insted the Dataset.from_tensor_slices function, but this
    # approach is more general
    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
    
    for _ in range(EPOCHS):
        x,y = sess.run(next_elements, feed_dict = {handle: train_handle})
        print(x, y)
        
    x,y = sess.run(next_elements, feed_dict = {handle: test_handle})
    print(x,y)

[0.36652708 0.34349123] [0.97863287]
[0.5297945  0.42868015] [0.78588873]
[0.2527375  0.95566773] [0.29431063]
[0.83061224 0.8994813 ] [0.76414174]
[0.4356476  0.96186346] [0.46852523]
[0.6881188  0.90323484] [0.75912625]
[0.8467667  0.25164708] [0.68335015]
[0.6074306  0.80885905] [0.6883512]
[0.565559   0.92677295] [0.1901109]
[0.09625694 0.09132044] [0.18951891]
[0.6515981 0.889733 ] [0.8997203]


In [11]:
# BATCHING
BATCH_SIZE = 4
x = np.random.sample((100,2))
print("First  8 pairs in x: \n", x[:8], "\n" )

# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x).batch(BATCH_SIZE)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print("First  2 batches (each of 4 pairs) in dataset:")
    print(sess.run(el));    print(sess.run(el))

First  8 pairs in x: 
 [[0.57516571 0.72312181]
 [0.3508486  0.40030373]
 [0.31443465 0.13794457]
 [0.06788421 0.79406055]
 [0.56184218 0.0654742 ]
 [0.27286945 0.46926317]
 [0.58854606 0.29394427]
 [0.16162587 0.73755893]] 

First  2 batches (each of 4 pairs) in dataset:
[[0.57516571 0.72312181]
 [0.3508486  0.40030373]
 [0.31443465 0.13794457]
 [0.06788421 0.79406055]]
[[0.56184218 0.0654742 ]
 [0.27286945 0.46926317]
 [0.58854606 0.29394427]
 [0.16162587 0.73755893]]


In [12]:
# REPEAT
BATCH_SIZE = 4
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.repeat() #<<<<<========================
                                                        #^
iter = dataset.make_one_shot_iterator()                 #^
el = iter.get_next()                                    #^
                                                        #^
#with tf.Session() as sess:                              #^
#     this will run forever #==Because of ==========>>>>>>
#    while True:
#        print(sess.run(el))

In [13]:
# MAP
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
print("Type of dataset before map(): ", dataset)

dataset = dataset.map(lambda x: x*2)
print("Type of dataset after map(): ", dataset)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
#     this will run forever.  
#RM: NOT correct. There is no dataset = dataset.repeat() here.
        for i in range(len(x)):
            try:
                print("i: ", i, ". x[i]: ", sess.run(el))
                print("i: ", i, ", x[i]: ", sess.run(el))
                print("i: ", i, ", x[i]: ", sess.run(el))
            except:
                """
                from:
                https://stackoverflow.com/questions/1278705/python-when-i-catch-an-exception-how-do-i-get-the-type-file-and-line-number
                """
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                print("i: ", i, "; Exception thrown as iterator is at the end. \n\tException Type: ", exc_type,\
                      "; \n\tIn File: ", fname, "; ", \
                      "\n\tAt Line No: ",exc_tb.tb_lineno)      

Type of dataset before map():  <TensorSliceDataset shapes: (1,), types: tf.int64>
Type of dataset after map():  <MapDataset shapes: (1,), types: tf.int64>
i:  0 . x[i]:  [2]
i:  0 , x[i]:  [4]
i:  0 , x[i]:  [6]
i:  1 . x[i]:  [8]
i:  1 ; Exception thrown as iterator is at the end. 
	Exception Type:  <class 'tensorflow.python.framework.errors_impl.OutOfRangeError'> ; 
	In File:  <ipython-input-13-b14a58f43f2b> ;  
	At Line No:  19
i:  2 ; Exception thrown as iterator is at the end. 
	Exception Type:  <class 'tensorflow.python.framework.errors_impl.OutOfRangeError'> ; 
	In File:  <ipython-input-13-b14a58f43f2b> ;  
	At Line No:  18
i:  3 ; Exception thrown as iterator is at the end. 
	Exception Type:  <class 'tensorflow.python.framework.errors_impl.OutOfRangeError'> ; 
	In File:  <ipython-input-13-b14a58f43f2b> ;  
	At Line No:  18


In [14]:
# SHUFFLE
BATCH_SIZE = 4
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.shuffle(buffer_size=100)
dataset = dataset.batch(BATCH_SIZE)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    try:
        print(sess.run(el), "\n")
        print(sess.run(el), "\n")
    except:
        """
        from:
        https://stackoverflow.com/questions/1278705/python-when-i-catch-an-exception-how-do-i-get-the-type-file-and-line-number
        """
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print("Exception thrown as iterator is at the end. \n\tException Type: ", exc_type,\
              "; \n\tIn File: ", fname, "; ", \
              "\n\tAt Line No: ",exc_tb.tb_lineno)  
        
#With new session there should not be any problem        
with tf.Session() as sess:
    print("But With new session there should not be any problem.\n", sess.run(el), "\n")


[[1]
 [4]
 [3]
 [2]] 

Exception thrown as iterator is at the end. 
	Exception Type:  <class 'tensorflow.python.framework.errors_impl.OutOfRangeError'> ; 
	In File:  <ipython-input-14-915945608dd8> ;  
	At Line No:  15
But With new session there should not be any problem.
 [[2]
 [1]
 [4]
 [3]] 



In [15]:
# how to pass the value to a model
EPOCHS = 10
BATCH_SIZE = 16
# using two numpy arrays
features, labels = (np.array([np.random.sample((100,2))]), 
                    np.array([np.random.sample((100,1))]))

dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)

iter = dataset.make_one_shot_iterator()
x, y = iter.get_next()

# make a simple model
net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
prediction = tf.layers.dense(net, 1, activation=tf.tanh)

loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label
train_op = tf.train.AdamOptimizer().minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(EPOCHS):
        _, loss_value = sess.run([train_op, loss])
        print("Iter: {}, Loss: {:.4f}".format(i, loss_value))

Iter: 0, Loss: 0.1149
Iter: 1, Loss: 0.1109
Iter: 2, Loss: 0.1075
Iter: 3, Loss: 0.1045
Iter: 4, Loss: 0.1021
Iter: 5, Loss: 0.1001
Iter: 6, Loss: 0.0985
Iter: 7, Loss: 0.0973
Iter: 8, Loss: 0.0965
Iter: 9, Loss: 0.0959


In [19]:
# Wrapping all together -> Switch between train and test set using Initializable iterator
NO_OF_EPOCHS = 10
######################
#RM
NoOfTrainingSamples = 100
NoOfTestSamples=20
INPUT_SHAPE = [None, 2] #Input data. 2-D Array of features
OUTPUT_SHAPE = [None, 1] #Output Classes. 1-D array of Labels
BATCH_SIZE = 10
#######################
# create a placeholder to dynamically switch between batch sizes
batch_size = tf.placeholder(tf.int64)

x, y = tf.placeholder(tf.float32, shape=INPUT_SHAPE), \
                tf.placeholder(tf.float32, shape=OUTPUT_SHAPE)
dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size).repeat()

iter = dataset.make_initializable_iterator()
features, labels = iter.get_next()
############################
# make a simple model
net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
prediction = tf.layers.dense(net, 1, activation=tf.tanh)

loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label
train_op = tf.train.AdamOptimizer().minimize(loss)

No_Of_Batches = NoOfTrainingSamples//BATCH_SIZE

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # using two numpy arrays
    train_data = (np.random.sample((NoOfTrainingSamples,INPUT_SHAPE[1])), \
                  np.random.sample((NoOfTrainingSamples,OUTPUT_SHAPE[1])))
    test_data = (np.random.sample((NoOfTestSamples,INPUT_SHAPE[1])), \
                 np.random.sample((NoOfTestSamples,OUTPUT_SHAPE[1])))

    # initialise iterator with train data
    #See https://www.tensorflow.org/api_docs/python/tf/data/Dataset#make_initializable_iterator
    # The initializer property returns the tf.operation that is run. It will initialize the iterator.
    # See https://www.tensorflow.org/api_docs/python/tf/data/Iterator#initializer
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
    ListOfFeatureLabelPairs = sess.run(iter.get_next())
    print(ListOfFeatureLabelPairs)
    print("List Of features: ", ListOfFeatureLabelPairs[0])
    print("List of labels: ", ListOfFeatureLabelPairs[1])
    
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # using two numpy arrays
    train_data = (np.random.sample((NoOfTrainingSamples,INPUT_SHAPE[1])), \
                  np.random.sample((NoOfTrainingSamples,OUTPUT_SHAPE[1])))
    test_data = (np.random.sample((NoOfTestSamples,INPUT_SHAPE[1])), \
                 np.random.sample((NoOfTestSamples,OUTPUT_SHAPE[1])))

    # initialise iterator with train data
    #See https://www.tensorflow.org/api_docs/python/tf/data/Dataset#make_initializable_iterator
    # The initializer property returns the tf.operation that is run. It will initialize the iterator.
    # See https://www.tensorflow.org/api_docs/python/tf/data/Iterator#initializer
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
    print('Training...')
    for epoch in range(NO_OF_EPOCHS):
        tot_loss = 0
        for _ in range(No_Of_Batches + 1):
            _, loss_value = sess.run([train_op, loss])
            tot_loss += loss_value
        print("Epoch No: {}, Loss: {:.4f}".format(epoch, tot_loss / No_Of_Batches))
        
    # initialise iterator with test data
    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})
    print('Test Loss: {:4f}'.format(sess.run(loss)))

    ##########################################################
    # Repeat above to prove that the iterator does not stall when it comes to the end
    # initialise iterator with train data
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
    print('Training Again...')
    for epoch in range(NO_OF_EPOCHS):
        tot_loss = 0
        for _ in range(No_Of_Batches):
            _, loss_value = sess.run([train_op, loss])
            tot_loss += loss_value
        print("Epoch No: {}, Loss: {:.4f}".format(epoch, tot_loss / No_Of_Batches))
        
    # initialise iterator with test data
    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})
    print('Test Loss: {:4f}'.format(sess.run(loss)))

    ##########################################################
    # Repeat yet again to prove that the iterator does not stall when it comes to the end
    # initialise iterator with train data
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
    print('Training yet Again...')
    for epoch in range(NO_OF_EPOCHS):
        tot_loss = 0
        for _ in range(No_Of_Batches):
            _, loss_value = sess.run([train_op, loss])
            tot_loss += loss_value
        print("Epoch No: {}, Loss: {:.4f}".format(epoch, tot_loss / No_Of_Batches))
        
    # initialise iterator with test data
    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})
    print('Test Loss: {:4f}'.format(sess.run(loss)))

    ##########################################################
    # Repeat yet again to prove that the iterator does not stall when it comes to the end
    # initialise iterator with train data
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
    print('Training yet Again...')
    for epoch in range(NO_OF_EPOCHS):
        tot_loss = 0
        for _ in range(No_Of_Batches):
            _, loss_value = sess.run([train_op, loss])
            tot_loss += loss_value
        print("Epoch No: {}, Loss: {:.4f}".format(epoch, tot_loss / No_Of_Batches))
        
    # initialise iterator with test data
    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})
    print('Test Loss: {:4f}'.format(sess.run(loss)))


(array([[0.24807023, 0.17148587],
       [0.86233073, 0.65456855],
       [0.00860412, 0.78815776],
       [0.46045172, 0.71933   ],
       [0.09725407, 0.17776896],
       [0.27247015, 0.5730647 ],
       [0.9112755 , 0.96162033],
       [0.4934017 , 0.9538497 ],
       [0.75804895, 0.41793817],
       [0.74809986, 0.826399  ]], dtype=float32), array([[0.32958052],
       [0.6368118 ],
       [0.02543208],
       [0.56506634],
       [0.00580711],
       [0.0143747 ],
       [0.75209737],
       [0.44360676],
       [0.49445188],
       [0.492034  ]], dtype=float32))
List Of features:  [[0.24807023 0.17148587]
 [0.86233073 0.65456855]
 [0.00860412 0.78815776]
 [0.46045172 0.71933   ]
 [0.09725407 0.17776896]
 [0.27247015 0.5730647 ]
 [0.9112755  0.96162033]
 [0.4934017  0.9538497 ]
 [0.75804895 0.41793817]
 [0.74809986 0.826399  ]]
List of labels:  [[0.32958052]
 [0.6368118 ]
 [0.02543208]
 [0.56506634]
 [0.00580711]
 [0.0143747 ]
 [0.75209737]
 [0.44360676]
 [0.49445188]
 [0.492034  

In [None]:
# Wrapping all together -> Switch between train and test set using Reinitializable iterator
EPOCHS = 10
######################
#RM
NoOfTrainingSamples = 100
NoOfTestSamples=20
#######################
# create a placeholder to dynamically switch between batch sizes
batch_size = tf.placeholder(tf.int64)

x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size).repeat()
test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size) # always batch even 
                                                                #if you want to one shot it
# using two numpy arrays
train_data = (np.random.sample((NoOfTrainingSamples,2)), np.random.sample((NoOfTrainingSamples,1)))
test_data = (np.random.sample((NoOfTestSamples,2)), np.random.sample((NoOfTestSamples,1)))

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)
features, labels = iter.get_next()

# create the initialisation operations
train_init_op = iter.make_initializer(train_dataset)
test_init_op = iter.make_initializer(test_dataset)

# make a simple model
net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
prediction = tf.layers.dense(net, 1, activation=tf.tanh)

loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label
train_op = tf.train.AdamOptimizer().minimize(loss)

n_batches = int(NoOfTrainingSamples/BATCH_SIZE)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # initialise iterator with train data
    sess.run(train_init_op, feed_dict = {x : train_data[0], y: train_data[1], batch_size: 16})
    print('Training...')
    for i in range(EPOCHS):
        tot_loss = 0
        for _ in range(n_batches):
            _, loss_value = sess.run([train_op, loss])
            tot_loss += loss_value
        print("Iter: {}, Loss: {:.4f}".format(i, tot_loss / n_batches))
    # initialise iterator with test data
    sess.run(test_init_op, feed_dict = {x : test_data[0], y: test_data[1], batch_size:len(test_data[0])})
    print('Test Loss: {:4f}'.format(sess.run(loss)))


In [None]:
# load a csv
CSV_PATH = './tweets.csv'
dataset = tf.contrib.data.make_csv_dataset(CSV_PATH, batch_size=32, shuffle=False) 
#RM: Original code default value of shuffle (True) was used. With shuffle set to False we read the CSV
#file, row-by-row.

iter = dataset.make_one_shot_iterator()
next = iter.get_next()
#print("iter.get_next(): ", next, "\n") # next is a dict with key=columns names and value=column data

#################################################
#
inputs, labels = next['text'], next['sentiment']
#################################################

with  tf.Session() as sess:
    print(sess.run([inputs,labels]), "\n")
    print(sess.run([inputs,labels]), "\n")

In [None]:
log_time = {}
# copied form https://medium.com/pythonhive/python-decorator-to-measure-the-execution-time-of-methods-fa04cb6bb36d
def how_much(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__)
            kw['log_time'][name] = (te - ts)
            
        return result
    return timed

In [None]:
# benchmark
import time
DATA_SIZE = 5000
DATA_SHAPE = ((32,32),(20,))
BATCH_SIZE = 64 
N_BATCHES = DATA_SIZE // BATCH_SIZE
EPOCHS = 10

test_size = (DATA_SIZE//100)*20 

DUMMY = -1

train_shape = (DATA_SHAPE)
print("DATA_SHAPE:",train_shape)
train_shape = (DATA_SHAPE[0])
print("DATA_SHAPE[0]:",train_shape)
train_shape = ((DUMMY, *DATA_SHAPE))
print("DUMMY, *DATA_SHAPE:",train_shape)
train_shape = ((DUMMY, (*DATA_SHAPE))[0])
print("DUMMY, (*DATA_SHAPE))[0]:",train_shape)
train_shape = ((DUMMY, (*DATA_SHAPE)))
print("DUMMY, (*DATA_SHAPE):",train_shape, "\n")


train_shape = ((DATA_SIZE, *DATA_SHAPE[0]),(DATA_SIZE, *DATA_SHAPE[1]))
test_shape = ((test_size, *DATA_SHAPE[0]),(test_size, *DATA_SHAPE[1]))
print(train_shape, test_shape)

In [None]:
train_data = (np.random.sample(train_shape[0]), np.random.sample(train_shape[1]))
test_data = (np.random.sample(test_shape[0]), np.random.sample(test_shape[1])) 

# used to keep track of the methodds
log_time = {}

tf.reset_default_graph()
#sess = tf.InteractiveSession() #This needs to be closed below. Replaced it with "with tf.Session() as sess" in 
                                #each of the functions 

input_shape = [None, *DATA_SHAPE[0]] # [None, 64, 64, 3]
output_shape = [None,*DATA_SHAPE[1]] # [None, 20]
print("input_shape: {} output_shape: {}".format(input_shape, output_shape))

x, y = tf.placeholder(tf.float32, shape=input_shape), tf.placeholder(tf.float32, shape=output_shape)

@how_much
def one_shot(**kwargs):
    print('\none_shot:')
    
    #The datasets - both train and test - are loaded
    #Two data sets. Two iterators. One for each
    #
    #From: https://www.tensorflow.org/api_docs/python/tf/data/Dataset#make_one_shot_iterator
    #Note: The returned iterator will be ***initialized automatically***.
    #A "one-shot" iterator ***does not*** currently support re-initialization.
    #
    train_dataset = tf.data.Dataset.from_tensor_slices(train_data).batch(BATCH_SIZE).repeat()
    train_iter = train_dataset.make_one_shot_iterator()
    train_element = train_iter.get_next()

    test_dataset = tf.data.Dataset.from_tensor_slices(test_data).batch(BATCH_SIZE).repeat()
    test_iter = test_dataset.make_one_shot_iterator()
    test_element = test_iter.get_next()

    with tf.Session() as sess:
        for epoch_no in range(EPOCHS):
            print(epoch_no, end="")
            for batch_no in range(N_BATCHES):
                element = sess.run(train_element)
                training_data_batch = element[0]
                if(0 == epoch_no):
                    assert(BATCH_SIZE == np.size(training_data_batch, 0))
                else:
                    if((epoch_no - 1) == batch_no):
                        assert((DATA_SIZE % BATCH_SIZE) == np.size(training_data_batch, 0))
                        #N_BATCHES (given by DATA_SIZE // BATCH_SIZE) is 78.
                        #78 batches are read every epoch. That leaves 8 slices at the end of the epoch_no 0
                        #Epoch 1 batch_no 0 reads only 8 when the iterator cycles back. In the remaining 77, 
                        #(77 * BATCH_SIZE) = 4928 slices would be read. Leaving (5000 - 4928) 72 slices. 
                        #So Epoch 2 will read 64 slices in batch_no 0 and then 8 slices in batch_no 1 before 
                        #the iterator goes back. So we have a batch of 8 slices rippling forward with each epoch
                        #if (2 == epoch_no):
                    else:
                        assert(BATCH_SIZE == np.size(training_data_batch,0))

            for _ in range(N_BATCHES):
                sess.run(test_element)

@how_much
def initialisable(**kwargs):
    print('\ninitialisable:')
    
    #x and y are placeholders. They have to be loaded. The types and shapes are known
    #not the values.
    #The same placeholder is used both for train and for test data.
    #Hence the graph needs only one dataset node and one iterator
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(BATCH_SIZE).repeat()

    iter = dataset.make_initializable_iterator()
    iter_init = iter.initializer
    elements = iter.get_next()

    with tf.Session() as sess:
        for i in range(EPOCHS):
            print(i, end="")
            #Initialize to run dataset node that has been loaded with training data
            sess.run(iter_init, feed_dict={ x: train_data[0], y: train_data[1]})
            for _ in range(N_BATCHES):
                sess.run(elements)
                
            #Re-initialize to run the ***same dataset node*** *** but with test data ***    
            sess.run(iter_init, feed_dict={ x: test_data[0], y: test_data[1]})
            for _ in range(N_BATCHES):
                sess.run(elements)

@how_much            
def reinitializable(**kwargs):
    print('\nreinitializable:')
    # create two datasets, one for training and one for test
    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    
    # create ***an*** iterator of the correct shape and type
    iter = tf.data.Iterator.from_structure(train_dataset.output_types,
                                               train_dataset.output_shapes)
    elements = iter.get_next()
    
    # create the nodes to initialize the iterators to iterate over the train_dataset and the test_dataset
    # See https://www.tensorflow.org/api_docs/python/tf/data/Iterator#make_initializer.
    #The same iterator node (iter) created above is re-initialized
    train_init_op = iter.make_initializer(train_dataset)
    test_init_op = iter.make_initializer(test_dataset)

    with tf.Session() as sess:
        for i in range(EPOCHS):
            print(i, end="")
            #Load the placeholders with train data and train label
            #Iniitialize the iterator to ***iterate over training data***
            sess.run(train_init_op, feed_dict={ x: train_data[0], y: train_data[1]})
            for _ in range(N_BATCHES):
                sess.run(elements)

            #Load the placeholders with test data and test label
            #RE-INITIALIZE the iter to ***now iterate over test data***
            sess.run(test_init_op, feed_dict={ x: test_data[0], y: test_data[1]})
            for _ in range(N_BATCHES):
                sess.run(elements)

@how_much            
def feedable(**kwargs):
    print('\nfeedable:')
    # create two datasets, one for training and one for test
    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    
    # create the iterators from the dataset
    train_iterator = train_dataset.make_initializable_iterator()
    test_iterator = test_dataset.make_initializable_iterator()

    handle = tf.placeholder(tf.string, shape=[])
    
    #See https://www.tensorflow.org/api_docs/python/tf/data/Iterator#from_string_handle
    iter = tf.data.Iterator.from_string_handle(
                            handle, \
                            train_dataset.output_types, \
                            train_dataset.output_shapes)
    elements = iter.get_next()
    
    make_train_string_handle = train_iterator.string_handle()
    make_test_string_handle = test_iterator.string_handle()
    
    init_train_iterator = train_iterator.initializer
    init_test_iterator = test_iterator.initializer

    with tf.Session() as sess:
        #See https://www.tensorflow.org/api_docs/python/tf/data/Iterator#string_handle
        train_string_handle = sess.run(make_train_string_handle)
        test_string_handle = sess.run(make_test_string_handle)

        #See https://www.tensorflow.org/api_docs/python/tf/data/Iterator#initializer
        sess.run(init_train_iterator, feed_dict={ x: train_data[0], y: train_data[1]})
        sess.run(init_test_iterator, feed_dict={ x: test_data[0], y: test_data[1]})

        for i in range(EPOCHS):
            print(i, end="")
            for _ in range(N_BATCHES):
                sess.run(elements, feed_dict={handle: train_string_handle})
            for _ in range(N_BATCHES):
                sess.run(elements, feed_dict={handle: test_string_handle})

print("")
one_shot(log_time=log_time)
print("")
initialisable(log_time=log_time)
print("")
reinitializable(log_time=log_time)
print("")
feedable(log_time=log_time)

sorted((value,key) for (key,value) in log_time.items())