In [1]:
import tensorflow.keras as keras
from tensorflow.keras import layers
import tensorflow as tf
import tensorflow_datasets as tfds
import time
import os
import re
import numpy as np

In [2]:
from google.colab import drive
drive.mount("/content/gdrive")
drive_path = "gdrive/MyDrive/MachineLearning/HandsOnMachineLearning/chapter15"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
from pathlib import Path


DOWNLOAD_ROOT = "http://download.tensorflow.org/data/"
FILENAME = "quickdraw_tutorial_dataset_v1.tar.gz"
filepath = keras.utils.get_file(FILENAME, DOWNLOAD_ROOT + FILENAME, extract=True)
path = Path(filepath).parent

In [4]:
def get_filepaths(split, path):
    regex = re.compile(split + ".tfrecord-\d{5}-of-\d{5}")
    filepaths = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if re.match(regex, file):
                filepaths.append(str(path / file))
    return filepaths

In [5]:
train_filepaths = get_filepaths("training", path)
eval_filepaths = get_filepaths("eval", path)
print(train_filepaths)

['/root/.keras/datasets/training.tfrecord-00004-of-00010', '/root/.keras/datasets/training.tfrecord-00001-of-00010', '/root/.keras/datasets/training.tfrecord-00008-of-00010', '/root/.keras/datasets/training.tfrecord-00009-of-00010', '/root/.keras/datasets/training.tfrecord-00003-of-00010', '/root/.keras/datasets/training.tfrecord-00000-of-00010', '/root/.keras/datasets/training.tfrecord-00007-of-00010', '/root/.keras/datasets/training.tfrecord-00006-of-00010', '/root/.keras/datasets/training.tfrecord-00005-of-00010', '/root/.keras/datasets/training.tfrecord-00002-of-00010']


In [None]:
def list_record_features(tfrecord_path):
    for rec in tf.data.TFRecordDataset(tfrecord_path).skip(100).take(10):
        example_bytes = rec.numpy()
        example = tf.train.Example()
        example.ParseFromString(example_bytes)
        for key, value in example.features.feature.items():
            kind = value.WhichOneof("kind")
            size = len(getattr(value, kind).value)
            print(f"key:  {key}", f"kind: {kind}", f"size: {size}",
                  sep="\n", end="\n\n")


list_record_features(train_filepaths[3])

In [6]:
feature_description = {
    "class_index": tf.io.FixedLenFeature([], tf.int64),
    "shape": tf.io.FixedLenFeature([2], tf.int64),
    "ink": tf.io.VarLenFeature(tf.float32)
}


def _parse_function(proto):
    example = tf.io.parse_single_example(proto, feature_description)
    class_index = example["class_index"]
    ink = example["ink"]
    ink = tf.sparse.to_dense(ink)
    ink = tf.reshape(ink, (-1, 1))
    return ink, class_index


def image_generator(tfrecord_filepaths):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_filepaths)
    dataset = raw_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    for batch in dataset.padded_batch(batch_size).prefetch(1):
        yield batch


batch_size = 2048
output_signature = (
    tf.TensorSpec(shape=(None, None, 1), dtype=tf.float32),
    tf.TensorSpec(shape=(None,), dtype=tf.int64)
)
train_set = tf.data.Dataset.from_generator(
    lambda: image_generator(train_filepaths),
    output_signature=output_signature
)
valid_set = tf.data.Dataset.from_generator(
    lambda: image_generator(eval_filepaths),
    output_signature=output_signature
)

In [7]:
with open(path / "eval.tfrecord.classes") as test_classes_file:
    test_classes = test_classes_file.readlines()

with open(path / "training.tfrecord.classes") as train_classes_file:
    train_classes = train_classes_file.readlines()

assert train_classes == test_classes
class_names = [name.strip().lower() for name in train_classes]

In [8]:
filepath = time.strftime(f"{drive_path}/models/checkpoint_%Y_%m_%d-%H_%M_%S")
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    filepath,
    monitor='val_loss',
    verbose=0,
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    save_freq='epoch',
    initial_value_threshold=None
)


model = keras.models.Sequential([
    keras.layers.Conv1D(32, kernel_size=5, strides=2, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(64, kernel_size=5, strides=2, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(128, kernel_size=3, strides=2, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.LSTM(128, return_sequences=True),
    keras.layers.LSTM(128),
    keras.layers.Dense(len(class_names), activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
              metrics=["accuracy"])

In [9]:
model.fit(train_set, epochs=5, validation_data=valid_set,
          callbacks=[checkpoint_cb])

Epoch 1/5
    181/Unknown - 296s 2s/step - loss: 5.8442 - accuracy: 0.0031

KeyboardInterrupt: 