In [1]:
import tensorflow as tf

In [2]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])

dataset = dataset.map(lambda x: x*2) 
for e in dataset:
    print (e)


tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)


# Batching dataset elements

In [30]:
dataset = tf.data.Dataset.range(100)
batched_dataset = dataset.batch(5)

for batch in batched_dataset.take(2):
    print(batch.numpy())

[0 1 2 3 4]
[5 6 7 8 9]


In [19]:
inc_dataset = tf.data.Dataset.range(100)
dec_dataset = tf.data.Dataset.range(0, -100, -1)
dataset = tf.data.Dataset.zip((inc_dataset, dec_dataset))
batched_dataset = dataset.batch(4)

for batch in batched_dataset.take(4):
    print([arr.numpy() for arr in batch])


[array([0, 1, 2, 3], dtype=int64), array([ 0, -1, -2, -3], dtype=int64)]
[array([4, 5, 6, 7], dtype=int64), array([-4, -5, -6, -7], dtype=int64)]
[array([ 8,  9, 10, 11], dtype=int64), array([ -8,  -9, -10, -11], dtype=int64)]
[array([12, 13, 14, 15], dtype=int64), array([-12, -13, -14, -15], dtype=int64)]


In [36]:
dataset = tf.data.Dataset.from_tensor_slices(
    ['Ali', 'Hassan', 'Hanieh', 'Sara', 'Omid'])

In [37]:
dataset = dataset.batch(2)

In [38]:
for e in dataset:
    print (e.numpy())

[b'Ali' b'Hassan']
[b'Hanieh' b'Sara']
[b'Omid']


### with drop_remainder

In [40]:
dataset = tf.data.Dataset.from_tensor_slices(
    ['Ali', 'Hassan', 'Hanieh', 'Sara', 'Omid'])

In [41]:
dataset = dataset.batch(2, drop_remainder=True)

In [42]:
for e in dataset:
    print (e.numpy())

[b'Ali' b'Hassan']
[b'Hanieh' b'Sara']


# repeat

In [55]:
dataset = tf.data.Dataset.from_tensor_slices(
    ['Ali', 'Hassan', 'Hanieh', 'Sara', 'Omid'])
dataset = dataset.repeat(3)
dataset = dataset.batch(2, drop_remainder=True)
for e in dataset:
    print (e.numpy())

[b'Ali' b'Hassan']
[b'Hanieh' b'Sara']
[b'Omid' b'Ali']
[b'Hassan' b'Hanieh']
[b'Sara' b'Omid']
[b'Ali' b'Hassan']
[b'Hanieh' b'Sara']


### shuffle

In [68]:
dataset = tf.data.Dataset.from_tensor_slices(
    ['Ali', 'Hassan', 'Hanieh', 'Sara', 'Omid'])
dataset = dataset.shuffle(5)
dataset = dataset.batch(2, drop_remainder=True)
for e in dataset:
    print (e.numpy())

[b'Ali' b'Hanieh']
[b'Hassan' b'Omid']


In [70]:
dataset = tf.data.Dataset.from_tensor_slices(
    ['a', 'b', 'c', 'd', 'e', 'f'])
dataset = dataset.shuffle(6)
dataset = dataset.repeat(2)
dataset = dataset.batch(2, drop_remainder=True)
for e in dataset:
    print (e.numpy())

[b'f' b'a']
[b'b' b'd']
[b'c' b'e']
[b'a' b'f']
[b'c' b'd']
[b'e' b'b']


In [72]:
dataset = tf.data.Dataset.from_tensor_slices(
    ['a', 'b', 'c', 'd', 'e', 'f'])
dataset = dataset.shuffle(buffer_size=6, reshuffle_each_iteration=False)
dataset = dataset.repeat(2)
dataset = dataset.batch(2, drop_remainder=True)
for e in dataset:
    print (e.numpy())

[b'c' b'a']
[b'd' b'f']
[b'b' b'e']
[b'c' b'a']
[b'd' b'f']
[b'b' b'e']


# Load image with tf.data!

download data from:

https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    
or

http://colab.class.vision/flower_photos.tgz

for colab user or local:

In [None]:
flowers_root = tf.keras.utils.get_file(
    'flower_photos',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    untar=True)


for local (I had already downloaded this dataset on my disk):

In [78]:
flowers_root = "D:/dataset/flower_photos"

In [79]:
list_ds = tf.data.Dataset.list_files(str(flowers_root+'*/*'))


In [80]:
for e in list_ds.take(3):
    print(e)

tf.Tensor(b'D:\\dataset\\flower_photos\\tulips\\4550805310_5f81c9ba08_n.jpg', shape=(), dtype=string)
tf.Tensor(b'D:\\dataset\\flower_photos\\daisy\\19865728236_a62f8f445b_n.jpg', shape=(), dtype=string)
tf.Tensor(b'D:\\dataset\\flower_photos\\dandelion\\4574737576_044403a997_n.jpg', shape=(), dtype=string)


In [81]:
def process_path(file_path):
    #label = tf.strings.split(file_path, '/')[-2]
    label = tf.strings.split(file_path, '\\')[-2]
    return tf.io.read_file(file_path), label

In [82]:
labeled_ds = list_ds.map(process_path)

In [83]:
for image, label in labeled_ds.take(3):
    print(label)

tf.Tensor(b'daisy', shape=(), dtype=string)
tf.Tensor(b'daisy', shape=(), dtype=string)
tf.Tensor(b'roses', shape=(), dtype=string)


In [84]:
image

<tf.Tensor: shape=(), dtype=string, numpy=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x01\x00\x00\x0cHLino\x02\x10\x00\x00mntrRGB XYZ \x07\xce\x00\x02\x00\t\x00\x06\x001\x00\x00acspMSFT\x00\x00\x00\x00IEC sRGB\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-HP  \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11cprt\x00\x00\x01P\x00\x00\x003desc\x00\x00\x01\x84\x00\x00\x00lwtpt\x00\x00\x01\xf0\x00\x00\x00\x14bkpt\x00\x00\x02\x04\x00\x00\x00\x14rXYZ\x00\x00\x02\x18\x00\x00\x00\x14gXYZ\x00\x00\x02,\x00\x00\x00\x14bXYZ\x00\x00\x02@\x00\x00\x00\x14dmnd\x00\x00\x02T\x00\x00\x00pdmdd\x00\x00\x02\xc4\x00\x00\x00\x88vued\x00\x00\x03L\x00\x00\x00\x86view\x00\x00\x03\xd4\x00\x00\x00$lumi\x00\x00\x03\xf8\x00\x00\x00\x14meas\x00\x00\x04\x0c\x00\x00\x00$tech\

In [85]:
def process_path(file_path):
    #label = tf.strings.split(file_path, '/')[-2]
    label = tf.strings.split(file_path, '\\')[-2]
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image)
    return image, label

In [86]:
labeled_ds = list_ds.map(process_path)
for image, label in labeled_ds.take(3):
    print(image.shape)    
    print(label)

(240, 180, 3)
tf.Tensor(b'sunflowers', shape=(), dtype=string)
(213, 320, 3)
tf.Tensor(b'tulips', shape=(), dtype=string)
(217, 240, 3)
tf.Tensor(b'daisy', shape=(), dtype=string)


### can we make a batch from it?

In [87]:
flowers_root = "D:/dataset/flower_photos"
list_ds = tf.data.Dataset.list_files(str(flowers_root+'*/*'))
labeled_ds = list_ds.map(process_path)
batched_ds = labeled_ds.batch(32)
for image, label in batched_ds.take(3):
    print(image.shape)    


InvalidArgumentError: Cannot add tensor to the batch: number of elements does not match. Shapes are: [tensor]: [310,500,3], [batch]: [240,320,3]