In [1]:
import tensorflow as tf
print(tf.__version__)

2.3.0


In [None]:
!ls ../chapter_07/train_base_model/tf_datasets/

In [5]:
!ls -lrt /content/tfrecord-dataset/flowers

total 182640
-rw-r--r-- 1 root root      782 Aug  6 02:23 dataset_info.json
-rw-r--r-- 1 root root  3417952 Aug  6 02:23 image_classification_builder-validation.tfrecord-00000-of-00001
-rw-r--r-- 1 root root  2419057 Aug  6 02:23 image_classification_builder-test.tfrecord-00000-of-00001
-rw-r--r-- 1 root root       49 Aug  6 02:23 image-encoded.image.json
-rw-r--r-- 1 root root 90482995 Aug  6 02:24 image_classification_builder-train.tfrecord-00000-of-00002
-rw-r--r-- 1 root root 90701075 Aug  6 02:24 image_classification_builder-train.tfrecord-00001-of-00002
drwxr-xr-x 1 root root        0 Oct  5 18:14 custom_cnn
drwxr-xr-x 1 root root        0 Oct  5 18:14 full_model
drwxr-xr-x 1 root root        0 Oct  5 18:14 quantized_resnet_vector


In [6]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import os
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import IPython.display as display
from tensorflow import keras
AUTOTUNE = tf.data.experimental.AUTOTUNE

print(tf.__version__)
print(hub.__version__)

2.3.0
0.9.0


In [7]:
root_dir = '/content/tfrecord-dataset/flowers'
train_file_pattern = "{}/image_classification_builder-train*.tfrecord*".format(root_dir)
val_file_pattern = "{}/image_classification_builder-validation*.tfrecord*".format(root_dir)
test_file_pattern = "{}/image_classification_builder-test*.tfrecord*".format(root_dir)

In [8]:
train_all_files = tf.data.Dataset.list_files( tf.io.gfile.glob(train_file_pattern))
val_all_files = tf.data.Dataset.list_files( tf.io.gfile.glob(val_file_pattern))
test_all_files = tf.data.Dataset.list_files( tf.io.gfile.glob(test_file_pattern))

In [9]:
train_all_ds = tf.data.TFRecordDataset(train_all_files, num_parallel_reads = AUTOTUNE)
val_all_ds = tf.data.TFRecordDataset(val_all_files, num_parallel_reads = AUTOTUNE)
test_all_ds = tf.data.TFRecordDataset(test_all_files, num_parallel_reads = AUTOTUNE)

In [10]:
print("Sample size for training: {0}".format(sum(1 for _ in tf.data.TFRecordDataset(train_all_files)))
     ,'\n', "Sample size for validation: {0}".format(sum(1 for _ in tf.data.TFRecordDataset(val_all_files)))
     ,'\n', "Sample size for test: {0}".format(sum(1 for _ in tf.data.TFRecordDataset(test_all_files))))

Sample size for training: 3540 
 Sample size for validation: 80 
 Sample size for test: 50


In [11]:
def decode_and_resize(serialized_example):
    # resized image should be [224, 224, 3] and normalized to value range [0, 255] 
    # label is integer index of class.
    
    parsed_features = tf.io.parse_single_example(
      serialized_example,
      features = {
    'image/channels' :  tf.io.FixedLenFeature([], tf.int64),
    'image/class/label' :  tf.io.FixedLenFeature([], tf.int64),
    'image/class/text' : tf.io.FixedLenFeature([], tf.string),
    'image/colorspace' : tf.io.FixedLenFeature([], tf.string),
    'image/encoded' : tf.io.FixedLenFeature([], tf.string),
    'image/filename' : tf.io.FixedLenFeature([], tf.string),
    'image/format' : tf.io.FixedLenFeature([], tf.string),
    'image/height' : tf.io.FixedLenFeature([], tf.int64),
    'image/width' : tf.io.FixedLenFeature([], tf.int64)
    })
    image = tf.io.decode_jpeg(parsed_features['image/encoded'], channels=3)
    label = tf.cast(parsed_features['image/class/label'], tf.int32)
    label_txt = tf.cast(parsed_features['image/class/text'], tf.string)
    label_one_hot = tf.one_hot(label, depth = 5)
    resized_image = tf.image.resize(image, [224, 224], method='nearest')
    return resized_image, label_one_hot

def normalize(image, label):
    #Convert `image` from [0, 255] -> [0, 1.0] floats 
    image = tf.cast(image, tf.float32) / 255.
    return image, label

In [12]:
resized_train_ds = train_all_ds.map(decode_and_resize, num_parallel_calls=AUTOTUNE)
resized_val_ds = val_all_ds.map(decode_and_resize, num_parallel_calls=AUTOTUNE)
resized_test_ds = test_all_ds.map(decode_and_resize, num_parallel_calls=AUTOTUNE)

resized_normalized_train_ds = resized_train_ds.map(normalize, num_parallel_calls=AUTOTUNE)
resized_normalized_val_ds = resized_val_ds.map(normalize, num_parallel_calls=AUTOTUNE)
resized_normalized_test_ds = resized_test_ds.map(normalize, num_parallel_calls=AUTOTUNE)

In [13]:
pixels =224
IMAGE_SIZE = (pixels, pixels)
TRAIN_BATCH_SIZE = 32
# Validation and test data are small. Use all in a batch.
VAL_BATCH_SIZE = sum(1 for _ in tf.data.TFRecordDataset(val_all_files))
TEST_BATCH_SIZE = sum(1 for _ in tf.data.TFRecordDataset(test_all_files))

In [14]:
def prepare_for_model(ds, BATCH_SIZE, cache=True, TRAINING_DATA=True, shuffle_buffer_size=1000):
  # This is a small dataset, only load it once, and keep it in memory.
  # use `.cache(filename)` to cache preprocessing work for datasets that don't
  # fit in memory.
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()
  
  ds = ds.shuffle(buffer_size=shuffle_buffer_size)
  if TRAINING_DATA:
    # Repeat forever
    ds = ds.repeat()


  ds = ds.batch(BATCH_SIZE)

  # `prefetch` lets the dataset fetch batches in the background while the model
  # is training.
  ds = ds.prefetch(buffer_size=AUTOTUNE)

  return ds

In [15]:
NUM_EPOCHS = 5
SHUFFLE_BUFFER_SIZE = 1000

prepped_test_ds = prepare_for_model(resized_normalized_test_ds, TEST_BATCH_SIZE, False, False)

prepped_train_ds = resized_normalized_train_ds.repeat(100).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
prepped_train_ds = prepped_train_ds.batch(TRAIN_BATCH_SIZE)
prepped_train_ds = prepped_train_ds.prefetch(buffer_size = AUTOTUNE)


prepped_val_ds = resized_normalized_val_ds.repeat(NUM_EPOCHS).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
prepped_val_ds = prepped_val_ds.batch(80)
prepped_val_ds = prepped_val_ds.prefetch(buffer_size = AUTOTUNE)

In [16]:
FINE_TUNING_CHOICE = False
NUM_CLASSES = 5
IMAGE_SIZE = (224, 224)

In [17]:
mdl = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,), name='input_layer'),
    hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v1_101/feature_vector/4",
                   trainable=FINE_TUNING_CHOICE, name = 'resnet_fv'), 
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name = 'custom_class')
])
mdl.build([None, 224, 224, 3])

In [18]:
mdl.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy'])

In [19]:
mdl.fit(
    prepped_train_ds,
    epochs=5, steps_per_epoch=100,
    validation_data=prepped_val_ds,
    validation_steps=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f17ad637358>