In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

In [5]:
(ds_train, ds_test), ds_info = tfds.load(
  "mnist",
  split=["train", "test"],
  shuffle_files=True,
  as_supervised=True,       # will return tuple (img, label) otherwise dict
  with_info=True,           # able to get info about dataset
)

In [7]:
def normalize_image(image, label):
  return tf.cast(image, tf.float32) / 255.0, label

In [8]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 128

ds_train = ds_train.map(normalize_image, num_parallel_calls=AUTOTUNE)
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
ds_train = ds_train.cache()
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(AUTOTUNE)

ds_test = ds_test.map(normalize_image, num_parallel_calls=AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE)
ds_test = ds_test.prefetch(AUTOTUNE)

In [9]:
model = keras.Sequential(
    [
        keras.Input((28, 28, 1)),
        layers.Conv2D(32, 3, activation="relu"),
        layers.Flatten(),
        tf.keras.layers.Dense(10),
    ]
)

model.compile(
    optimizer=keras.optimizers.Adam(0.001),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

model.fit(ds_train, epochs=5, verbose=True)
model.evaluate(ds_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.06587778776884079, 0.9797000288963318]

In [12]:
# IMDB Dataset

(ds_train, ds_test), ds_info = tfds.load(
    "imdb_reviews",
    split=["train", "test"],
    shuffle_files=True,
    as_supervised=True,  # will return tuple (img, label) otherwise dict
    with_info=True,  # able to get info about dataset
)

In [61]:
tokenizer = tfds.features.text.Tokenizer()


def build_vocabulary():
    vocabulary = set()
    for text, _ in ds_train:
        vocabulary.update(tokenizer.tokenize(text.numpy().lower()))
    return vocabulary

vocabulary = build_vocabulary()

In [63]:
encoder = tfds.features.text.TokenTextEncoder(
  vocabulary, oov_token="<UNK>", lowercase=True, tokenizer=tokenizer
)

def encode(text_tensor, label):
  return encoder.encode(text_tensor.numpy().lower()), label


def encode_map_fn(text, label):
    # py_func doesn't set the shape of the returned tensors.
    encoded_text, label = tf.py_function(
        encode, inp=[text, label], Tout=(tf.int64, tf.int64)
    )

    # `tf.data.Datasets` work best if all components have a shape set
    #  so set the shapes manually:
    encoded_text.set_shape([None])
    label.set_shape([])

    return encoded_text, label


In [64]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = ds_train.map(encode_map_fn, num_parallel_calls=AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(1000)
ds_train = ds_train.padded_batch(32, padded_shapes=([None], ()))
ds_train = ds_train.prefetch(AUTOTUNE)

ds_test = ds_test.map(encode_map_fn)
ds_test = ds_test.padded_batch(32, padded_shapes=([None], ()))

In [65]:
model = keras.Sequential(
    [
        layers.Masking(mask_value=0),
        layers.Embedding(input_dim=len(vocabulary) + 2, output_dim=32),
        layers.GlobalAveragePooling1D(),
        layers.Dense(64, activation="relu"),
        layers.Dense(1),
    ]
)

In [66]:
model.compile(
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(3e-4, clipnorm=1),
    metrics=["accuracy"],
)

model.fit(ds_train, epochs=15, verbose=2)
model.evaluate(ds_test)

Epoch 1/15
782/782 - 25s - loss: 0.6750 - accuracy: 0.5018
Epoch 2/15
782/782 - 21s - loss: 0.4966 - accuracy: 0.7159
Epoch 3/15
782/782 - 21s - loss: 0.3389 - accuracy: 0.8576
Epoch 4/15
782/782 - 22s - loss: 0.2703 - accuracy: 0.8924
Epoch 5/15
782/782 - 23s - loss: 0.2313 - accuracy: 0.9120
Epoch 6/15
782/782 - 21s - loss: 0.2016 - accuracy: 0.9242
Epoch 7/15
782/782 - 20s - loss: 0.1783 - accuracy: 0.9363
Epoch 8/15
782/782 - 20s - loss: 0.1565 - accuracy: 0.9445
Epoch 9/15
782/782 - 21s - loss: 0.1389 - accuracy: 0.9520
Epoch 10/15
782/782 - 20s - loss: 0.1234 - accuracy: 0.9584
Epoch 11/15
782/782 - 20s - loss: 0.1087 - accuracy: 0.9646
Epoch 12/15
782/782 - 21s - loss: 0.0971 - accuracy: 0.9685
Epoch 13/15
782/782 - 21s - loss: 0.0857 - accuracy: 0.9730
Epoch 14/15
782/782 - 21s - loss: 0.0760 - accuracy: 0.9772
Epoch 15/15
782/782 - 22s - loss: 0.0665 - accuracy: 0.9805


[0.345954567193985, 0.8838800191879272]