In [1]:
import tensorflow as tf
import time

In [2]:
tf.__version__

'2.9.1'

In [5]:
class FileDataset(tf.data.Dataset):
    def read_files_in_batches(num_samples):
        # open file
        time.sleep(0.03)
        for sample_idx in range(num_samples):
            time.sleep(0.015)
            yield (sample_idx,)
    
    def __new__(cls, num_samples=3):
        return tf.data.Dataset.from_generator(
            cls.read_files_in_batches,
            output_signature=tf.TensorSpec(shape = (1,), dtype = tf.float32),
            args=(num_samples,)
        )

In [6]:
def benchmark(dataset, num_epochs=2):
    for epoch_num in range(num_epochs):
        for sample in dataset:
            time.sleep(0.01)

In [7]:
%%timeit
benchmark(FileDataset())

2022-07-26 11:43:38.534636: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-26 11:43:48.213032: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7390 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080, pci bus id: 0000:04:00.0, compute capability: 6.1
2022-07-26 11:43:48.247521: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 7390 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1080, pci bus id: 0000:05:00.0, compute capability: 6.1
2022-07-26 11:43:48.250409: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/de

358 ms ± 2.72 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
benchmark(FileDataset().prefetch(1))

334 ms ± 3.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit
benchmark(FileDataset().prefetch(tf.data.AUTOTUNE))

335 ms ± 4.22 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
dataset = tf.data.Dataset.range(5)
for d in dataset:
    print(d.numpy())

0
1
2
3
4


In [11]:
dataset = dataset.map(lambda x: x**2)
for d in dataset:
    print(d.numpy())

0
1
4
9
16


In [12]:
dataset = dataset.cache()
list(dataset.as_numpy_iterator())

[0, 1, 4, 9, 16]

In [14]:
def mapped_function(s):
    tf.py_function(lambda: time.sleep(0.03), [], ())
    return s

In [15]:
%%timeit -n1 -r1

benchmark(FileDataset().map(mapped_function), 5)

1.47 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [16]:
%%timeit -n1 -r1

benchmark(FileDataset().map(mapped_function).cache(), 5)

462 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
