In [2]:
import tensorflow as tf
import numpy as np

  from ._conv import register_converters as _register_converters


In [2]:
x = np.random.sample((100,2))
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print(sess.run(el))

[0.50035296 0.92651365]


In [3]:
# using two numpy arrays
features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))
dataset = tf.data.Dataset.from_tensor_slices((features,labels))

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print(sess.run(el))

(array([0.33327842, 0.90874317]), array([0.02171065]))


In [4]:
# using a tensor
dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([100, 2]))

iter = dataset.make_initializable_iterator()
el = iter.get_next()

with tf.Session() as sess:
    sess.run(iter.initializer)
    print(sess.run(el))

[0.00786543 0.26009214]


In [5]:
# using a placeholder
x = tf.placeholder(tf.float32, shape=[None,2])
dataset = tf.data.Dataset.from_tensor_slices(x)

data = np.random.sample((100,2))

iter = dataset.make_initializable_iterator()
el = iter.get_next()

with tf.Session() as sess:
    sess.run(iter.initializer, feed_dict={ x: data })
    print(sess.run(el))

[0.03433903 0.7280311 ]


In [6]:
# from generator
sequence = np.array([[[1]],[[2],[3]],[[3],[4],[5]]])

def generator():
    for el in sequence:
        yield el

dataset = tf.data.Dataset().batch(1).from_generator(generator,
                                           output_types= tf.int64, 
                                           output_shapes=(tf.TensorShape([None, 1])))

iter = dataset.make_initializable_iterator()
el = iter.get_next()

with tf.Session() as sess:
    sess.run(iter.initializer)
    print(sess.run(el))
    print(sess.run(el))
    print(sess.run(el))


[[1]]
[[2]
 [3]]
[[3]
 [4]
 [5]]


In [7]:
# initializable iterator to switch between data
EPOCHS = 10

x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
dataset = tf.data.Dataset.from_tensor_slices((x, y))

train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.array([[1,2]]), np.array([[0]]))

iter = dataset.make_initializable_iterator()
features, labels = iter.get_next()

with tf.Session() as sess:
#     initialise iterator with train data
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
    for _ in range(EPOCHS):
        sess.run([features, labels])
#     switch to test data
    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
    print(sess.run([features, labels]))

    
    

[array([1., 2.], dtype=float32), array([0.], dtype=float32)]


In [8]:
# Reinitializable iterator to switch between Datasets
EPOCHS = 10
# making fake data using numpy
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))
# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
test_dataset = tf.data.Dataset.from_tensor_slices(test_data)
# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)
features, labels = iter.get_next()
# create the initialisation operations
train_init_op = iter.make_initializer(train_dataset)
test_init_op = iter.make_initializer(test_dataset)

with tf.Session() as sess:
    sess.run(train_init_op) # switch to train dataset
    for _ in range(EPOCHS):
        sess.run([features, labels])
    sess.run(test_init_op) # switch to val dataset
    print(sess.run([features, labels]))

    
    

[array([0.94182994, 0.26802265]), array([0.81551463])]


In [9]:
# feedable iterator to switch between iterators
EPOCHS = 10
# making fake data using numpy
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))
# create placeholder
x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices((x,y))
test_dataset = tf.data.Dataset.from_tensor_slices((x,y))
# create the iterators from the dataset
train_iterator = train_dataset.make_initializable_iterator()
test_iterator = test_dataset.make_initializable_iterator()
# same as in the doc https://www.tensorflow.org/programmers_guide/datasets#creating_an_iterator
handle = tf.placeholder(tf.string, shape=[])
iter = tf.data.Iterator.from_string_handle(
    handle, train_dataset.output_types, train_dataset.output_shapes)
next_elements = iter.get_next()

with tf.Session() as sess:
    train_handle = sess.run(train_iterator.string_handle())
    test_handle = sess.run(test_iterator.string_handle())
    
    # initialise iterators. In our case we could have used the 'one-shot' iterator instead,
    # and directly feed the data insted the Dataset.from_tensor_slices function, but this
    # approach is more general
    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
    
    for _ in range(EPOCHS):
        x,y = sess.run(next_elements, feed_dict = {handle: train_handle})
        print(x, y)
        
    x,y = sess.run(next_elements, feed_dict = {handle: test_handle})
    print(x,y)

[0.8552025  0.13344285] [0.24534453]
[0.23880187 0.2294315 ] [0.77315474]
[0.763904 0.439595] [0.42727667]
[0.6563372 0.1366187] [0.02278621]
[0.71135175 0.394754  ] [0.8552778]
[0.7329701  0.42924434] [0.43608633]
[0.8240853 0.7750715] [0.5140434]
[0.65556693 0.67978406] [0.8228361]
[0.02365288 0.18461536] [0.85140544]
[0.48037764 0.7320316 ] [0.773141]
[0.6671238 0.8491173] [0.45188755]


In [10]:
# BATCHING
BATCH_SIZE = 4
x = np.random.sample((100,2))
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x).batch(BATCH_SIZE)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print(sess.run(el))

[[0.70861276 0.91522017]
 [0.993154   0.74425373]
 [0.42730845 0.03037355]
 [0.54031161 0.57429001]]


In [4]:
# REPEAT
BATCH_SIZE = 4
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.repeat()

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    for _ in range(8):
        print(sess.run(el))

[1]
[2]
[3]
[4]
[1]
[2]
[3]
[4]


In [None]:
# MAP
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.map(lambda x: x*2)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
#     this will run forever
        for _ in range(len(x)):
            print(sess.run(el))

In [12]:
# SHUFFLE
BATCH_SIZE = 4
x = np.array([[1],[2],[3],[4]])
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.shuffle(buffer_size=100)
dataset = dataset.batch(BATCH_SIZE)

iter = dataset.make_one_shot_iterator()
el = iter.get_next()

with tf.Session() as sess:
    print(sess.run(el))

[[3]
 [1]
 [2]
 [4]]


In [13]:
# how to pass the value to a model
EPOCHS = 10
BATCH_SIZE = 16
# using two numpy arrays
features, labels = (np.array([np.random.sample((100,2))]), 
                    np.array([np.random.sample((100,1))]))

dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)

iter = dataset.make_one_shot_iterator()
x, y = iter.get_next()

# make a simple model
net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
prediction = tf.layers.dense(net, 1, activation=tf.tanh)

loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label
train_op = tf.train.AdamOptimizer().minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(EPOCHS):
        _, loss_value = sess.run([train_op, loss])
        print("Iter: {}, Loss: {:.4f}".format(i, loss_value))

Iter: 0, Loss: 0.1913
Iter: 1, Loss: 0.1814
Iter: 2, Loss: 0.1720
Iter: 3, Loss: 0.1631
Iter: 4, Loss: 0.1547
Iter: 5, Loss: 0.1469
Iter: 6, Loss: 0.1397
Iter: 7, Loss: 0.1329
Iter: 8, Loss: 0.1267
Iter: 9, Loss: 0.1210


In [18]:
# Wrapping all together -> Switch between train and test set using Initializable iterator
EPOCHS = 10
# create a placeholder to dynamically switch between batch sizes
batch_size = tf.placeholder(tf.int64)
BATCH_SIZE = 32

x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size).repeat()

# using two numpy arrays
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((20,2)), np.random.sample((20,1)))

iter = dataset.make_initializable_iterator()
features, labels = iter.get_next()
# make a simple model
net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
prediction = tf.layers.dense(net, 1, activation=tf.tanh)

loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label
train_op = tf.train.AdamOptimizer().minimize(loss)

n_batches = train_data[0].shape[0] // BATCH_SIZE

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # initialise iterator with train data
    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
    print('Training...')
    for i in range(EPOCHS):
        tot_loss = 0
        for _ in range(n_batches):
            _, loss_value = sess.run([train_op, loss])
            tot_loss += loss_value
        print("Iter: {}, Loss: {:.4f}".format(i, tot_loss / n_batches))
    # initialise iterator with test data
    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})
    print('Test Loss: {:4f}'.format(sess.run(loss)))


3
Training...
Iter: 0, Loss: 1.4389
Iter: 1, Loss: 1.4704
Iter: 2, Loss: 1.4081
Iter: 3, Loss: 1.2877
Iter: 4, Loss: 1.1842
Iter: 5, Loss: 1.1944
Iter: 6, Loss: 1.1166
Iter: 7, Loss: 0.9924
Iter: 8, Loss: 0.8997
Iter: 9, Loss: 0.8817
Test Loss: 0.836423


In [10]:
# Wrapping all together -> Switch between train and test set using Reinitializable iterator
EPOCHS = 10
# create a placeholder to dynamically switch between batch sizes
batch_size = tf.placeholder(tf.int64)

x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size).repeat()
test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size) # always batch even if you want to one shot it
# using two numpy arrays
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((20,2)), np.random.sample((20,1)))

# create a iterator of the correct shape and type
iter = tf.data.Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)
features, labels = iter.get_next()
# create the initialisation operations
train_init_op = iter.make_initializer(train_dataset)
test_init_op = iter.make_initializer(test_dataset)

# make a simple model
net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
net = tf.layers.dense(net, 8, activation=tf.tanh)
prediction = tf.layers.dense(net, 1, activation=tf.tanh)

loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label
train_op = tf.train.AdamOptimizer().minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # initialise iterator with train data
    sess.run(train_init_op, feed_dict = {x : train_data[0], y: train_data[1], batch_size: 16})
    print('Training...')
    for i in range(EPOCHS):
        tot_loss = 0
        for _ in range(n_batches):
            _, loss_value = sess.run([train_op, loss])
            tot_loss += loss_value
        print("Iter: {}, Loss: {:.4f}".format(i, tot_loss / n_batches))
    # initialise iterator with test data
    sess.run(test_init_op, feed_dict = {x : test_data[0], y: test_data[1], batch_size:len(test_data[0])})
    print('Test Loss: {:4f}'.format(sess.run(loss)))


Training...
Iter: 0, Loss: 0.1602
Iter: 1, Loss: 0.1191
Iter: 2, Loss: 0.0964
Iter: 3, Loss: 0.0907
Iter: 4, Loss: 0.0738
Iter: 5, Loss: 0.0819
Iter: 6, Loss: 0.0728
Iter: 7, Loss: 0.0881
Iter: 8, Loss: 0.0765
Iter: 9, Loss: 0.0729
Test Loss: 0.091081


In [32]:
# load a csv
CSV_PATH = './tweets.csv'
dataset = tf.contrib.data.make_csv_dataset(CSV_PATH, batch_size=32)
iter = dataset.make_one_shot_iterator()
next = iter.get_next()
print(next) # next is a dict with key=columns names and value=column data
inputs, labels = next['text'], next['sentiment']

with  tf.Session() as sess:
    print(sess.run([inputs,labels]))

{'sentiment': <tf.Tensor 'IteratorGetNext_15:0' shape=(?,) dtype=int32>, 'text': <tf.Tensor 'IteratorGetNext_15:1' shape=(?,) dtype=string>}
[array([b"@MENTION, i agree! i'm trying to finish my thesis..and so far it's not going anywhere",
       b'@MENTION erm, sacre coeur even... james -1',
       b"@MENTION now am depressed and br'3ii is sleepin since i came home",
       b'just finishing what turned out to be a nice day',
       b"it's over. it was great. dollhouse",
       b'just got bck from cross-country practice wooo 3 miles!',
       b'@MENTION you mean a man who cheats on his wife habitually and is a complete hypocrite... they have plenty of those already.',
       b'i feel bad for che ming wang though, he cant seem to get it together... and has an era of 30+',
       b'@MENTION @MENTION ok.. so i tweeted about the rains and it is not raining anymore. atleast i wont have to water the plants tomorrow.',
       b'is gutted about katie & peter love them!',
       b"finally home..

In [2]:
log_time = {}
# copied form https://medium.com/pythonhive/python-decorator-to-measure-the-execution-time-of-methods-fa04cb6bb36d
def how_much(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__)
            kw['log_time'][name] = (te - ts)
            
        return result
    return timed

In [3]:
# benchmark
import time
DATA_SIZE = 5000
DATA_SHAPE = ((32,32),(20,))
BATCH_SIZE = 64 
N_BATCHES = DATA_SIZE // BATCH_SIZE
EPOCHS = 10

test_size = (DATA_SIZE//100)*20 

train_shape = ((DATA_SIZE, *DATA_SHAPE[0]),(DATA_SIZE, *DATA_SHAPE[1]))
test_shape = ((test_size, *DATA_SHAPE[0]),(test_size, *DATA_SHAPE[1]))
print(train_shape, test_shape)
train_data = (np.random.sample(train_shape[0]), np.random.sample(train_shape[1]))
test_data = (np.random.sample(test_shape[0]), np.random.sample(test_shape[1])) 

((5000, 32, 32), (5000, 20)) ((1000, 32, 32), (1000, 20))


In [4]:
# used to keep track of the methodds
log_time = {}

tf.reset_default_graph()
sess = tf.InteractiveSession()

input_shape = [None, *DATA_SHAPE[0]] # [None, 64, 64, 3]
output_shape = [None,*DATA_SHAPE[1]] # [None, 20]
print(input_shape, output_shape)

x, y = tf.placeholder(tf.float32, shape=input_shape), tf.placeholder(tf.float32, shape=output_shape)

@how_much
def one_shot(**kwargs):
    print('one_shot')
    train_dataset = tf.data.Dataset.from_tensor_slices(train_data).batch(BATCH_SIZE).repeat()
    train_el = train_dataset.make_one_shot_iterator().get_next()
    
    test_dataset = tf.data.Dataset.from_tensor_slices(test_data).batch(BATCH_SIZE).repeat()
    test_el = test_dataset.make_one_shot_iterator().get_next()
    for i in range(EPOCHS):
        print(i)
        for _ in range(N_BATCHES):
            sess.run(train_el)
        for _ in range(N_BATCHES):
            sess.run(test_el)
            
@how_much
def initialisable(**kwargs):
    print('initialisable')
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(BATCH_SIZE).repeat()

    iter = dataset.make_initializable_iterator()
    elements = iter.get_next()
    
    for i in range(EPOCHS):
        print(i)
        sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
        for _ in range(N_BATCHES):
            sess.run(elements)
        sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
        for _ in range(N_BATCHES):
            sess.run(elements)
@how_much            
def reinitializable(**kwargs):
    print('reinitializable')
    # create two datasets, one for training and one for test
    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    # create a iterator of the correct shape and type
    iter = tf.data.Iterator.from_structure(train_dataset.output_types,
                                               train_dataset.output_shapes)
    elements = iter.get_next()
    # create the initialisation operations
    train_init_op = iter.make_initializer(train_dataset)
    test_init_op = iter.make_initializer(test_dataset)
    
    for i in range(EPOCHS):
        print(i)
        sess.run(train_init_op, feed_dict={ x: train_data[0], y: train_data[1]})
        for _ in range(N_BATCHES):
            sess.run(elements)
        sess.run(test_init_op, feed_dict={ x: test_data[0], y: test_data[1]})
        for _ in range(N_BATCHES):
            sess.run(elements)
@how_much            
def feedable(**kwargs):
    print('feedable')
    # create two datasets, one for training and one for test
    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()
    # create the iterators from the dataset
    train_iterator = train_dataset.make_initializable_iterator()
    test_iterator = test_dataset.make_initializable_iterator()

    handle = tf.placeholder(tf.string, shape=[])
    iter = tf.data.Iterator.from_string_handle(
        handle, train_dataset.output_types, train_dataset.output_shapes)
    elements = iter.get_next()

    train_handle = sess.run(train_iterator.string_handle())
    test_handle = sess.run(test_iterator.string_handle())

    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})

    for i in range(EPOCHS):
        print(i)
        for _ in range(N_BATCHES):
            sess.run(elements, feed_dict={handle: train_handle})
        for _ in range(N_BATCHES):
            sess.run(elements, feed_dict={handle: test_handle})
            
one_shot(log_time=log_time)
initialisable(log_time=log_time)
reinitializable(log_time=log_time)
feedable(log_time=log_time)

sorted((value,key) for (key,value) in log_time.items())


[None, 32, 32] [None, 20]
one_shot
0
1
2
3
4
5
6
7
8
9
initialisable
0
1
2
3
4
5
6
7
8
9
reinitializable
0
1
2
3
4
5
6
7
8
9
feedable
0
1
2
3
4
5
6
7
8
9


[(1.5659220218658447, 'reinitializable'),
 (1.581655740737915, 'initialisable'),
 (1.7346899509429932, 'feedable'),
 (2.3557801246643066, 'one_shot')]