In [160]:
import tensorflow as tf

import os

## Dataset from array

In [29]:
daily_sales_numbers = [21, 22, -108, 31, -1, 32, 34, 31]

In [30]:
# Create a TensorFlow Dataset from a Python list
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

By default each element of `tf.data.Dataset` is a `tensor`. We can convert tensors to numpy arrays using the method `.numpy()` or by iterating over numpy arrays using `as_numpy_iterator()`.

In [31]:
# Iterating over dataset
for sales in tf_dataset:
    print(sales)

tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(22, shape=(), dtype=int32)
tf.Tensor(-108, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(-1, shape=(), dtype=int32)
tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(34, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)


In [32]:
# Iterating using as_numpy_iterator()
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


In [33]:
# Take first 3
for sales in tf_dataset.take(3):
    print(sales)

tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(22, shape=(), dtype=int32)
tf.Tensor(-108, shape=(), dtype=int32)


In [34]:
# Filter
tf_dataset = tf_dataset.filter(lambda x: x > 0)

for sales in tf_dataset:
    print(sales.numpy())

21
22
31
32
34
31


In [36]:
# Map
tf_dataset = tf_dataset.map(lambda x: x*2)

for sales in tf_dataset.as_numpy_iterator():
    print(sales)

42
44
62
64
68
62


In [38]:
# Shuffle
tf_dataset = tf_dataset.shuffle(3)

for sales in tf_dataset.as_numpy_iterator():
    print(sales)

42
64
44
62
68
62


In [45]:
# Batch
for sales in tf_dataset.batch(2):
    print(sales)

tf.Tensor([62 44], shape=(2,), dtype=int32)
tf.Tensor([68 62], shape=(2,), dtype=int32)
tf.Tensor([64 42], shape=(2,), dtype=int32)


### Chaining operations

In [123]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset = tf_dataset.filter(lambda x: x > 0).map(lambda y: y*2).shuffle(3).batch(2)

for sales in tf_dataset.as_numpy_iterator():
    print(sales)

[42 64]
[62 44]
[68 62]


# Dataset from images

In [179]:
# Stores image files, doesn't load them into memory yet
images_ds = tf.data.Dataset.list_files('images/*/*', shuffle=False)

for file in images_ds.take(5):
    print(file.numpy())

b'images\\funny\\FB_IMG_1625754072977.jpg'
b'images\\funny\\FB_IMG_1625907625802.jpg'
b'images\\funny\\FB_IMG_1626020773653.jpg'
b'images\\funny\\FB_IMG_1626766662431.jpg'
b'images\\funny\\Screenshot_20210907_170655.jpg'


In [180]:
images_ds = images_ds.shuffle(100)

for file in images_ds.take(5):
    print(file.numpy())

b'images\\funny\\Screenshot_20211114_130621.jpg'
b'images\\not_funny\\Screenshot_20220227_143451.jpg'
b'images\\funny\\FB_IMG_1626766662431.jpg'
b'images\\funny\\FB_IMG_1626020773653.jpg'
b'images\\funny\\Screenshot_20211103_180629.jpg'


In [181]:
class_names = ['funny', 'not_funny']

In [182]:
image_count = len(images_ds)
image_count

20

In [183]:
train_size = int(image_count * 0.8)

train_ds = images_ds.take(train_size)   # head(train_size)
test_ds = images_ds.skip(train_size)    # tail(image_count - train_size)

In [184]:
print(len(train_ds))
print(len(test_ds))

16
4


In [185]:
get_label = lambda file_path: tf.strings.split(file_path, os.path.sep)[1]

def process_image(file_path):
    label = get_label(file_path)
    
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128, 128])
    
    return img, label

In [186]:
train_ds = train_ds.map(process_image)

for image, label in train_ds.take(1):
    print(label)
    print(image[0][0])

tf.Tensor(b'funny', shape=(), dtype=string)
tf.Tensor([251.18774 251.18774 251.18774], shape=(3,), dtype=float32)


In [187]:
def scale(image, label):
    return image/255, label

In [188]:
train_ds = train_ds.map(scale)

for image, label in train_ds.take(1):
    print(label)
    print(image[0][0])

tf.Tensor(b'funny', shape=(), dtype=string)
tf.Tensor([0.06654412 0.06654412 0.06654412], shape=(3,), dtype=float32)
