<a href="https://colab.research.google.com/github/TirendazAcademy/Deep-Learning-with-TensorFlow/blob/main/TensorFlow-Ecosystem/tf-data-Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Working with tf.data.Dataset

In [1]:
import tensorflow as tf
x = tf.range(5)
dataset = tf.data.Dataset.from_tensor_slices(x)

In [2]:
dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [3]:
for item in dataset:
  print(item.numpy())

0
1
2
3
4


In [4]:
list(dataset.as_numpy_iterator())

[np.int32(0), np.int32(1), np.int32(2), np.int32(3), np.int32(4)]

In [5]:
dataset.element_spec

TensorSpec(shape=(), dtype=tf.int32, name=None)

In [6]:
dataset1= tf.data.Dataset.from_tensor_slices(
    tf.random.uniform([3,5])
)

In [7]:
dataset1.element_spec

TensorSpec(shape=(5,), dtype=tf.float32, name=None)

In [8]:
data = [5,1,-3,-2,-4,7,-10]
tf_dataset = tf.data.Dataset.from_tensor_slices(data)

In [9]:
for item in tf_dataset:
  print(item.numpy())

5
1
-3
-2
-4
7
-10


In [10]:
for item in tf_dataset.take(3):
  print(item.numpy())

5
1
-3


In [11]:
for item in tf_dataset.filter(lambda x:x>0):
  print(item.numpy())

5
1
7


In [12]:
for item in tf_dataset.map(lambda x:x*2):
  print(item.numpy())

10
2
-6
-4
-8
14
-20


In [13]:
for item in tf_dataset.shuffle(buffer_size=3):
  print(item.numpy())

-3
-2
1
5
-4
-10
7


In [14]:
for item in tf_dataset.batch(2):
  print(item.numpy())

[5 1]
[-3 -2]
[-4  7]
[-10]


In [15]:
dataset=tf.data.Dataset.from_tensor_slices(data)
dataset=dataset.filter(lambda x:x>0).map(lambda y:y*2).shuffle(3).batch(2)
for item in dataset:
  print(item.numpy())

[14 10]
[2]


# How to use tf.data for the image datasets

In [16]:
flowers_root = tf.keras.utils.get_file(
    origin="https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz",
    fname="flower_photos",
    extract=True
)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
[1m228813984/228813984[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 0us/step


In [17]:
flowers_root

'/root/.keras/datasets/flower_photos'

In [18]:
import pathlib
flowers_root = pathlib.Path(flowers_root)
flowers_root

PosixPath('/root/.keras/datasets/flower_photos')

In [19]:
ds = tf.data.Dataset.list_files(str(flowers_root/"*/*/*"))

In [20]:
image_count = len(ds)
image_count

3670

In [21]:
for file in ds.take(3):
  print(file.numpy())

b'/root/.keras/datasets/flower_photos/flower_photos/tulips/13910471347_30c8bf4de1_n.jpg'
b'/root/.keras/datasets/flower_photos/flower_photos/roses/6732261031_861a1026fa_n.jpg'
b'/root/.keras/datasets/flower_photos/flower_photos/dandelion/4571681134_b605a61547_n.jpg'


In [22]:
ds = ds.shuffle(200)
for file in ds.take(3):
  print(file.numpy())

b'/root/.keras/datasets/flower_photos/flower_photos/roses/15060816740_68e1b2c31b.jpg'
b'/root/.keras/datasets/flower_photos/flower_photos/sunflowers/4847062576_bae870479c_n.jpg'
b'/root/.keras/datasets/flower_photos/flower_photos/daisy/4792826628_aa5e5a9804_n.jpg'


In [23]:
train_size = int(image_count*0.8)
train_ds = ds.take(train_size)
test_ds = ds.skip(train_size)

In [24]:
len(train_ds)

2936

In [25]:
len(test_ds)

734

In [26]:
def get_label(file_path):
  import os
  parts= tf.strings.split(file_path, os.sep)
  return parts[-2]

In [27]:
x = b'/root/.keras/datasets/flower_photos/flower_photos/tulips/3485767306_6db7bdf536.jpg'
get_label(x).numpy()

b'tulips'

In [28]:
def process_image(file_path):
  label = get_label(file_path)
  img = tf.io.read_file(file_path)
  img = tf.image.decode_jpeg(img)
  img = tf.image.resize(img, [128,128])
  img = img / 255
  return img, label

In [29]:
img, label = process_image(x)

In [30]:
img.shape

TensorShape([128, 128, 3])

In [31]:
img[:3, :3, :1]

<tf.Tensor: shape=(3, 3, 1), dtype=float32, numpy=
array([[[0.10786276],
        [0.19831184],
        [0.17991321]],

       [[0.24564137],
        [0.1655405 ],
        [0.13321868]],

       [[0.2719073 ],
        [0.14083634],
        [0.10665163]]], dtype=float32)>

In [32]:
train_ds = train_ds.map(process_image)
test_ds = test_ds.map(process_image)

In [33]:
for image, label in train_ds.take(1):
  print(label.numpy())
  print(image.numpy())

b'roses'
[[[0.9876891  0.45899227 0.7169527 ]
  [0.9890618  0.5125756  0.7082402 ]
  [0.9820564  0.42001164 0.6338463 ]
  ...
  [0.98809195 0.4087151  0.6574025 ]
  [0.98471224 0.40363052 0.6636666 ]
  [0.9843137  0.39499584 0.6651142 ]]

 [[0.986764   0.44750282 0.72201216]
  [0.9854947  0.51070774 0.73992103]
  [0.9845294  0.44806984 0.62251025]
  ...
  [0.9884651  0.39961943 0.6495639 ]
  [0.9932744  0.4068029  0.6552806 ]
  [0.9848341  0.38661128 0.6671873 ]]

 [[0.9826495  0.4311679  0.704682  ]
  [0.9779883  0.5093726  0.74296755]
  [0.98918504 0.46623775 0.6616144 ]
  ...
  [0.98828125 0.3999878  0.65030926]
  [0.9818639  0.3961409  0.6430018 ]
  [0.97683394 0.38392094 0.624302  ]]

 ...

 [[0.81709987 0.26288298 0.5077426 ]
  [0.8181397  0.27709746 0.51483893]
  [0.82637554 0.26352012 0.48384818]
  ...
  [0.6795405  0.09111831 0.18587336]
  [0.6863499  0.11631314 0.26920477]
  [0.6757396  0.15161946 0.31989577]]

 [[0.8218288  0.25837928 0.5115797 ]
  [0.82234913 0.26614583 0.5

## Resource:
- [CodeBasics:Tensorflow Input Pipeline](https://youtu.be/VFEOskzhhbc)
- [Build TensorFlow input pipelines](https://www.tensorflow.org/guide/data#consuming_text_data)
- [Flowers dataset](https://www.kaggle.com/datasets/alxmamaev/flowers-recognition)

üöÄ Let's connect: [YouTube](https://www.youtube.com/c/TirendazAcademy) | [Medium](https://tirendazacademy.medium.com) | [Twitter](https://twitter.com/evrenozkip) | [Instagram](https://www.instagram.com/tirendazacademy) | [GitHub](https://github.com/TirendazAcademy) | [LinkedIn](https://www.linkedin.com/in/tirendaz-academy)