In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import time

In [2]:
(train_data, test_data), ds_info = tfds.load('imdb_reviews', split=['train', 'test'], as_supervised=True, with_info=True)

In [5]:
def preprocess(text, label):
    text = tf.strings.substr(text, 0, 300)
    text = tf.strings.regex_replace(text, rb"<br\s*/?>", b" ")
    return text, label

In [7]:
encoder = tf.keras.layers.TextVectorization(max_tokens=10000)
encoder.adapt(train_data.map(lambda text, label: text))

In [23]:
def encode_map(text, label):
    return encoder(text), label
train_ds_unopt = train_data.map(preprocess).map(encode_map).padded_batch(32, padded_shapes=([300], []))
train_ds_opt = train_data.map(preprocess).map(encode_map).cache().padded_batch(32, padded_shapes=([300], [])).prefetch(tf.data.AUTOTUNE)

In [25]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 16),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy'])

In [27]:
start = time.time()
model.fit(train_ds_unopt, epochs=1)
unopt_time = time.time() - start

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 25ms/step - accuracy: 0.4994 - loss: 0.6934


In [29]:
start = time.time()
model.fit(train_ds_opt, epochs=1)
opt_time = time.time() - start

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 26ms/step - accuracy: 0.5040 - loss: 0.6824


In [31]:
print(f"Unoptimized Time: {unopt_time:.2f}s")
print(f"Optimized Time: {opt_time:.2f}s")

Unoptimized Time: 21.32s
Optimized Time: 20.22s
