In [None]:
import tensorflow as tf
from matplotlib import pyplot as plt

%matplotlib inline

In [None]:
tf.test.gpu_device_name()

# Datasets

## Basics

In [None]:
import numpy as np

data = np.random.rand(3, 6)
dataset = tf.data.Dataset.from_tensor_slices(data)
dataset

In [None]:
for i, row in enumerate(dataset):
  print(f"row {i}: {row.numpy()}")

In [None]:
for i, row in enumerate(dataset.take(2)):
  print(f"row {i}: {row.numpy()}")

In [None]:
dataset.element_spec

## From memory

In [None]:
train, test = tf.keras.datasets.fashion_mnist.load_data()

Training and test sets are tuples where the first tuple element contains feature images and the second contains corresponding labels.

In [None]:
train_img, train_lbl = train
test_img, test_lbl = test

In [None]:
train_image = train_img / 255.0
test_image = test_img / 255.0

In [None]:
train_image.shape, train_lbl.shape

In [None]:
test_image.shape, test_lbl.shape

In [None]:
plt.imshow(train_image[np.random.randint(0, 60000)], cmap="ocean", interpolation='nearest')
plt.axis("off")
plt.grid(visible=None)
plt.show()

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train_image, train_lbl))
test_ds = tf.data.Dataset.from_tensor_slices((test_image, test_lbl))
train_ds

In [None]:
img, label = train_ds.take(1).as_numpy_iterator().next()
img.shape, label.shape

The batch() method takes n examples from a dataset.

In [None]:
img_batch, label_batch = (
  train_ds.batch(5).take(1)
    .as_numpy_iterator().next()
)
img_batch.shape, label_batch.shape

In [None]:
class_labels = ['T-shirt/top', 'Trouser', 'Pullover', 
                'Dress', 'Coat', 'Sandal', 'Shirt', 
                'Sneaker', 'Bag', 'Ankle boot']

## Configuration

- **Prefetching**: overlaps data preprocessing and model execution while training
- **Caching**: keeps data in memory after it is loaded off disk during the first epoch
- **Shuffling**
- **Batching**


In [None]:
SHUFFLE_SIZE = 5000
BATCH_SIZE = 64

train_ds_batched = (
    train_ds
    .shuffle(SHUFFLE_SIZE)
    .batch(BATCH_SIZE) 
)
train_ds_fmt = (
    train_ds_batched
    .cache()
    .prefetch(1)  # works at the batch level
)
test_ds_batched = (
    test_ds
    .batch(BATCH_SIZE) 
)
test_ds_fmt = (
    test_ds_batched
    .cache()
    .prefetch(1)
)

In [None]:
imgs, lbls = train_ds_fmt.take(1).as_numpy_iterator().next()

In [None]:
imgs.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.losses import SparseCategoricalCrossentropy

In [None]:
tf.keras.backend.clear_session()
np.random.seed(0)
tf.random.set_seed(0)

In [None]:
flatten_params = (28*28)                     # 784  
dense_1_params = (flatten_params + 1) * 128  # 100480
dense_2_params = (128 + 1) * 10              # 1290

In [None]:
img_shape = train_ds.take(1).element_spec[0].shape
img_shape

In [None]:
model = Sequential([
  Flatten(input_shape=img_shape),
  Dense(128, activation='relu'),
  Dropout(0.4),
  Dense(10, activation=None)
])
model.summary()

In [None]:
model.compile(
    optimizer="adam",
    loss=SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

In [None]:
EPOCHS = 10

history = model.fit(
    train_ds_fmt,
    epochs=EPOCHS,
    verbose=1,
    validation_data=test_ds_fmt,
)