In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

## One GPU strategy

In [2]:
# get available GPU
devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(devices[0], True)
gpu_name = "GPU:0"
print(devices[0])

# Only one gpu available to set to OneDeviceStrategy 
# Can be changed to MirroredStrategy if multiple GPU available
strategy = tf.distribute.OneDeviceStrategy(device=gpu_name)

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


## Get VOC 2012 dataset

In [3]:
# get voc 2012 dataset
splits = ['train[:80%]', 'train[80%:90%]', 'train[90%:]']

(train_examples, validation_examples, test_examples), info = tfds.load('voc/2012', with_info=True, split=splits)

In [4]:
info.features

FeaturesDict({
    'image': Image(shape=(None, None, 3), dtype=tf.uint8),
    'image/filename': Text(shape=(), dtype=tf.string),
    'labels': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=20)),
    'labels_no_difficult': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=20)),
    'objects': Sequence({
        'bbox': BBoxFeature(shape=(4,), dtype=tf.float32),
        'is_difficult': tf.bool,
        'is_truncated': tf.bool,
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=20),
        'pose': ClassLabel(shape=(), dtype=tf.int64, num_classes=5),
    }),
})

In [5]:
num_examples = info.splits['train'].num_examples
num_classes = info.features['labels'].num_classes
print(f"Number of train examples: {num_examples}, number of labels: {num_classes}")

Number of train examples: 5717, number of labels: 20


In [13]:
# resize and normalize images
@tf.function
def format_image(tensor):
    images = tf.image.resize(tensor['image'], IMAGE_SIZE) / 255.0
    return images, tensor['objects']

In [14]:
BUFFER_SIZE = num_examples
EPOCHS = 10
IMAGE_SIZE = (224, 224)

BATCH_SIZE_PER_REPLICA = 64
global_batch_size = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

In [15]:
# prepare batches
train_batches = train_examples.shuffle(num_examples // 4).map(format_image).batch(global_batch_size).prefetch(1)
validation_batches = validation_examples.map(format_image).batch(global_batch_size).prefetch(1)
test_batches = test_examples.map(format_image).batch(1)

In [19]:
test_image = list(train_examples)[2]
test_result = format_image(test_image)

print(f"Image of shapes: {test_result[0].shape}")
print(f"Boxes with localization(s):\n {test_result[1]['bbox']}")
print(f"and labels: {test_result[1]['label']}")

del test_image, test_result

Image of shapes: (224, 224, 3)
Boxes with localization(s):
 [[0.26426426 0.168      0.9429429  0.688     ]
 [0.27327326 0.002      0.96997    0.584     ]
 [0.18618618 0.268      0.4234234  0.426     ]
 [0.23423423 0.354      0.5495495  0.526     ]]
and labels: [12 12 14 14]


## Dataset distributed

In [20]:
def distribute_datasets(strategy, train_batches, validation_batches, test_batches):
    train_dist_dataset = strategy.experimental_distribute_dataset(train_batches)
    val_dist_dataset = strategy.experimental_distribute_dataset(validation_batches)
    test_dist_dataset = strategy.experimental_distribute_dataset(test_batches)
    
    return train_dist_dataset, val_dist_dataset, test_dist_dataset

In [21]:
train_dist_dataset, val_dist_dataset, test_dist_dataset = distribute_datasets(strategy, train_batches, validation_batches, test_batches)
print(type(train_dist_dataset))

<class 'tensorflow.python.distribute.input_lib.DistributedDataset'>
