<a href="https://colab.research.google.com/github/lmoroney/tfbook/blob/master/chapter4/parallelized-data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Good to run this to ensure you are using TF2.x

# Create the Model Architecture


In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_addons as tfa
import numpy as np
import multiprocessing

def create_model():
  model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', 
                                      input_shape=(300, 300, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
  ])


  model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])
  return model

# EXTRACT


In [3]:
train_data = tfds.load('cats_vs_dogs', split='train', with_info=True)


In [5]:
file_pattern = f'~/tensorflow_datasets/cats_vs_dogs/4.0.0/cats_vs_dogs-train.tfrecord*'
files = tf.data.Dataset.list_files(file_pattern)

In [6]:
train_dataset = files.interleave(tf.data.TFRecordDataset, cycle_length=4, num_parallel_calls=tf.data.experimental.AUTOTUNE)


# TRANSFORM


In [7]:
def read_tfrecord(serialized_example):
  feature_description={
      "image": tf.io.FixedLenFeature((), tf.string, ""),
      "label": tf.io.FixedLenFeature((), tf.int64, -1),
  }
  example = tf.io.parse_single_example(serialized_example, feature_description)
  image = tf.io.decode_jpeg(example['image'], channels=3)
  image = tf.cast(image, tf.float32)
  image = image / 255
  image = tf.image.resize(image, (300,300))
  return image, example['label']

In [8]:
cores = multiprocessing.cpu_count()
print(cores)

train_dataset = train_dataset.map(read_tfrecord, num_parallel_calls=cores)

#train_dataset = train_dataset.cache()

16


# LOAD

In [9]:
train_dataset = train_dataset.shuffle(1024).batch(32)

train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [10]:
model = create_model()
model.fit(train_dataset, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 