In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import os
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import IPython.display as display
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
print(tf.__version__)
print(nsl.__version__)

2.1.0
1.3.1


In [None]:
import sys
print(sys.executable)

/usr/bin/python3


In [None]:
from platform import python_version
print(python_version())

3.6.9


In [None]:
root_dir = './tfrecord-dataset/flowers'
train_file_pattern = "{}/image_classification_builder-train*.tfrecord*".format(root_dir)
val_file_pattern = "{}/image_classification_builder-validation*.tfrecord*".format(root_dir)
test_file_pattern = "{}/image_classification_builder-test*.tfrecord*".format(root_dir)

In [None]:
train_all_files = tf.data.Dataset.list_files( tf.io.gfile.glob(train_file_pattern))
val_all_files = tf.data.Dataset.list_files( tf.io.gfile.glob(val_file_pattern))
test_all_files = tf.data.Dataset.list_files( tf.io.gfile.glob(test_file_pattern))

In [None]:
train_all_ds = tf.data.TFRecordDataset(train_all_files, num_parallel_reads = AUTOTUNE)
val_all_ds = tf.data.TFRecordDataset(val_all_files, num_parallel_reads = AUTOTUNE)
test_all_ds = tf.data.TFRecordDataset(test_all_files, num_parallel_reads = AUTOTUNE)

In [None]:
train_sample_size = sum(1 for _ in tf.data.TFRecordDataset(train_all_files))
validation_sample_size = sum(1 for _ in tf.data.TFRecordDataset(val_all_files))
test_sample_size = sum(1 for _ in tf.data.TFRecordDataset(test_all_files))

In [None]:
print("Sample size for training: {0}".format(train_sample_size)
     ,'\n', "Sample size for validation: {0}".format(validation_sample_size)
     ,'\n', "Sample size for test: {0}".format(test_sample_size))

Sample size for training: 3540 
 Sample size for validation: 80 
 Sample size for test: 50


## Transforming TFRecords for training

In [None]:
def decode_and_resize(serialized_example):
    # resized image should be [224, 224, 3] and normalized to value range [0, 255] 
    # label is integer index of class.
    
    parsed_features = tf.io.parse_single_example(
      serialized_example,
      features = {
    'image/channels' :  tf.io.FixedLenFeature([], tf.int64),
    'image/class/label' :  tf.io.FixedLenFeature([], tf.int64),
    'image/class/text' : tf.io.FixedLenFeature([], tf.string),
    'image/colorspace' : tf.io.FixedLenFeature([], tf.string),
    'image/encoded' : tf.io.FixedLenFeature([], tf.string),
    'image/filename' : tf.io.FixedLenFeature([], tf.string),
    'image/format' : tf.io.FixedLenFeature([], tf.string),
    'image/height' : tf.io.FixedLenFeature([], tf.int64),
    'image/width' : tf.io.FixedLenFeature([], tf.int64)
    })
    image = tf.io.decode_jpeg(parsed_features['image/encoded'], channels=3)
    label = tf.cast(parsed_features['image/class/label'], tf.int32)
    label_txt = tf.cast(parsed_features['image/class/text'], tf.string)
    label_one_hot = tf.one_hot(label, depth = 5)
    resized_image = tf.image.resize(image, [224, 224], method='nearest')
    return resized_image, label_one_hot

def normalize(image, label):
    #Convert `image` from [0, 255] -> [0, 1.0] floats 
    image = tf.cast(image, tf.float32) / 255.
    return image, label

In [None]:
resized_train_ds = train_all_ds.map(decode_and_resize, num_parallel_calls=AUTOTUNE)
resized_val_ds = val_all_ds.map(decode_and_resize, num_parallel_calls=AUTOTUNE)
resized_test_ds = test_all_ds.map(decode_and_resize, num_parallel_calls=AUTOTUNE)

resized_normalized_train_ds = resized_train_ds.map(normalize, num_parallel_calls=AUTOTUNE)
resized_normalized_val_ds = resized_val_ds.map(normalize, num_parallel_calls=AUTOTUNE)
resized_normalized_test_ds = resized_test_ds.map(normalize, num_parallel_calls=AUTOTUNE)

In [None]:
pixels =224
IMAGE_SIZE = (pixels, pixels)
TRAIN_BATCH_SIZE = 32
VAL_BATCH_SIZE = validation_sample_size
TEST_BATCH_SIZE = test_sample_size

In [None]:
NUM_EPOCHS = 5
SHUFFLE_BUFFER_SIZE = 1000

#prepped_test_ds = prepare_for_model(resized_normalized_test_ds, TEST_BATCH_SIZE, False, False)

prepped_test_ds = resized_normalized_test_ds.batch(TEST_BATCH_SIZE).prefetch(buffer_size = AUTOTUNE)

prepped_train_ds = resized_normalized_train_ds.repeat(100).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
prepped_train_ds = prepped_train_ds.batch(TRAIN_BATCH_SIZE)
prepped_train_ds = prepped_train_ds.prefetch(buffer_size = AUTOTUNE)

prepped_val_ds = resized_normalized_val_ds.repeat(NUM_EPOCHS).shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
prepped_val_ds = prepped_val_ds.batch(80)
prepped_val_ds = prepped_val_ds.prefetch(buffer_size = AUTOTUNE)


In [None]:
train_image_batch, train_label_batch = next(iter(prepped_train_ds))
val_image_batch, val_label_batch = next(iter(prepped_val_ds))
test_image_batch, test_label_batch = next(iter(prepped_test_ds))

Now all training image batch should be [N, 224, 224, 3], all label batch should be [N, 5]

In [None]:
feature_description = {
    'image/channels' :  tf.io.FixedLenFeature([], tf.int64),
    'image/class/label' :  tf.io.FixedLenFeature([], tf.int64),
    'image/class/text' : tf.io.FixedLenFeature([], tf.string),
    'image/colorspace' : tf.io.FixedLenFeature([], tf.string),
    'image/encoded' : tf.io.FixedLenFeature([], tf.string),
    'image/filename' : tf.io.FixedLenFeature([], tf.string),
    'image/format' : tf.io.FixedLenFeature([], tf.string),
    'image/height' : tf.io.FixedLenFeature([], tf.int64),
    'image/width' : tf.io.FixedLenFeature([], tf.int64)
}


def _parse_function(example_proto):
  # Parse the input `tf.Example` proto using the dictionary above.
  return tf.io.parse_single_example(example_proto, feature_description)

parsd_ds = train_all_ds.map(_parse_function)

label_map = {}
# getting label mapping
for image_features in parsd_ds.shuffle(1024).take(100):
    label_idx = image_features['image/class/label'].numpy()
    label_str = image_features['image/class/text'].numpy().decode()
    if label_idx not in label_map:
        label_map[label_idx] = label_str

In [None]:
label_map

{0: 'roses', 1: 'sunflowers', 2: 'daisy', 3: 'dandelion', 4: 'tulips'}

In [None]:
FINE_TUNING_CHOICE = False
NUM_CLASSES = 5

Define L1 and L2 regularizers:

In [None]:
KERNEL_REGULARIZER = tf.keras.regularizers.l2(l=0.01)
ACTIVITY_REGULARIZER = tf.keras.regularizers.L1L2(l1=0.01,l2=0.01)

Pass regularizers into model architecture:

In [None]:
mdl = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)),
    hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4",
                   trainable=FINE_TUNING_CHOICE), 
    tf.keras.layers.Dense(NUM_CLASSES 
                          ,activation='softmax'
                          ,kernel_regularizer=KERNEL_REGULARIZER
                          ,activity_regularizer = ACTIVITY_REGULARIZER
                          ,name = 'custom_class')
])
mdl.build([None, 224, 224, 3])

In [None]:
mdl.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_2 (KerasLayer)   (None, 2048)              23564800  
_________________________________________________________________
custom_class (Dense)         (None, 5)                 10245     
Total params: 23,575,045
Trainable params: 10,245
Non-trainable params: 23,564,800
_________________________________________________________________


In [None]:
mdl.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy'])

In [None]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        # Path where to save the model
        # The two parameters below mean that we will overwrite
        # the current checkpoint if and only if
        # the `val_loss` score has improved.
        # The saved model name will include the current epoch.
        filepath="mymodel_{epoch}",
        save_best_only=False,  # Only save a model if `val_loss` has improved.
        monitor="val_accuracy",
        verbose=1,
    )
]

In [None]:
prepped_train_ds

<PrefetchDataset shapes: ((None, 224, 224, 3), (None, 5)), types: (tf.float32, tf.float32)>

`prepped_train_ds` is a tuple.

In [None]:
import time

In [None]:
tic = time.time()
mdl.fit(
    prepped_train_ds,
    epochs=5, steps_per_epoch=100,
    validation_data=prepped_val_ds,
    validation_steps=1,
    callbacks=callbacks)
toc = time.time()
print('Total training time with regularization in second: ', toc-tic)

Train for 100 steps, validate for 1 steps
Epoch 1/5
Epoch 00001: saving model to mymodel_1
INFO:tensorflow:Assets written to: mymodel_1/assets


INFO:tensorflow:Assets written to: mymodel_1/assets


Epoch 2/5
Epoch 00002: saving model to mymodel_2
INFO:tensorflow:Assets written to: mymodel_2/assets


INFO:tensorflow:Assets written to: mymodel_2/assets


Epoch 3/5
Epoch 00003: saving model to mymodel_3
INFO:tensorflow:Assets written to: mymodel_3/assets


INFO:tensorflow:Assets written to: mymodel_3/assets


Epoch 4/5
Epoch 00004: saving model to mymodel_4
INFO:tensorflow:Assets written to: mymodel_4/assets


INFO:tensorflow:Assets written to: mymodel_4/assets


Epoch 5/5
Epoch 00005: saving model to mymodel_5
INFO:tensorflow:Assets written to: mymodel_5/assets


INFO:tensorflow:Assets written to: mymodel_5/assets


Total training time with regularization in second:  76.6740140914917


Training process with L1 and L2 regularization is completed. 