### Import libraries

In [1]:
import numpy as np
import tensorflow as tf
import os

print("NumPy version:", np.__version__)
print("TensorFlow version:", tf.__version__)

NumPy version: 1.23.5
TensorFlow version: 2.6.0


### Helper functions

In [2]:
def process_image(file_path_tensor):
    parts = tf.strings.split(file_path_tensor, os.sep)
    label = parts[-2]

    image = tf.io.read_file(file_path_tensor)
    image = tf.image.decode_jpeg(image)
    image = tf.image.resize(image, [128, 128])
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = image / 255

    return image, label


def check_shape(x, y):
    print(x.shape)
    d1, d2, d3 = x.shape
    return d3 == 3

### Load data

In [3]:
images_ds = tf.data.Dataset.list_files("./images/*/*", shuffle=True)

file_path = next(iter(images_ds))
image, label = process_image(file_path)

print("Shape:", image.shape)
print("Class label:", label.numpy().decode())

Shape: (128, 128, 3)
Class label: dog


### Technique 1

In [4]:
# ETL pipeline.
X_y_tensors = (
    images_ds
    .map(process_image)   # Extra and Transform
    .filter(check_shape)  # Filter
    .as_numpy_iterator()  # Load
)

print("Final X count:", len(list(X_y_tensors)))

(128, 128, None)
Final X count: 0


### Technique 2

In [5]:
X_y_tensors = images_ds.map(process_image)

count = 0
for x, y in X_y_tensors:
    d1, d2, d3 = x.shape
    if d3 > 3:
        continue
    count += 1

print("Final X count:", count)

Final X count: 123
