In [1]:
import numpy as np
import tensorflow as tf
tf.enable_eager_execution()
tf.__version__

'1.14.0'

## Parameters

In [2]:
import pathlib
#img_path = "/home/Data/CharactersTrimPad28/"
img_path = "./s3mnt/ChineseNumbers/"
img_path = "/home/Data/ChineseNumbers/"
data_root = pathlib.Path(img_path)
AUTOTUNE = tf.data.experimental.AUTOTUNE
NUM_EPOCHS = 1
BATCH_SIZE = 32

## Mapping function

In [3]:
def preprocess_image(image):
  image = tf.image.decode_image(image, channels=3)
  image = tf.cast(image, tf.float32)
  image /= 255.0  # normalize to [0,1] range
  return image

def load_and_preprocess_image(path):
  image = tf.read_file(path)
  return preprocess_image(image)

# The tuples are unpacked into the positional arguments of the mapped function
def load_and_preprocess_from_path_label(path, label):
  return load_and_preprocess_image(path), label

## Generate paths and labels

In [4]:
# tf.data.Dataset.from_tensor_slices
all_image_paths = [str(path) for path in list(data_root.glob('*/*'))]
for i in range(10):
    print(all_image_paths[i])

label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
label_to_index = dict((name, index) for index,name in enumerate(label_names))
all_image_labels = [label_to_index[pathlib.Path(path).parent.name]
                    for path in all_image_paths]

image_count = len(all_image_paths)

/home/Data/ChineseNumbers/二/Han yi Cu yuan ti Font-Traditional Chinese ttf.png
/home/Data/ChineseNumbers/二/JiaShang Liu Xing kai 5500 Font- Simplified Chinesettf.png
/home/Data/ChineseNumbers/二/Japan hengshan writing brush Font-Traditional Chinesettf.png
/home/Data/ChineseNumbers/二/Classic Cu hei Fontttf.png
/home/Data/ChineseNumbers/二/Chinese New Year(DFGirl-dospy-fei) font-Simplified Chinesettf.png
/home/Data/ChineseNumbers/二/Han yi Fang die Fontttf.png
/home/Data/ChineseNumbers/二/Classic Kong die hei Fontttf.png
/home/Data/ChineseNumbers/二/Childhood amusement park Font-Simplified Chinesettf.png
/home/Data/ChineseNumbers/二/Snow World  Butterfly Font-Simplified Chinesettf.png
/home/Data/ChineseNumbers/二/Hypocrite Youth v 20 Font-Simplified ChineseTTF.png


## Test iterate time

In [5]:
import time

def timeit(dataset):
    overall_start = time.time()
    n_image = 0

    start = time.time()
    for n_batch,(images,labels) in enumerate(dataset):
        n_image += int(images.shape[0])
        if n_image%100 == 0:
            #print('.',end='')
            #print("\r{} images in {} batches with BATCH_SIZE {}: {:.2f} s".format(n_image, n_batch, BATCH_SIZE, time.time()-start), end='', flush=True)
            print("\r{} images: {:.2f} s".format(n_image, time.time()-start), end='', flush=True)
    print()
    end = time.time()
    duration = end-start
    
    print("{} images: {} s".format(n_image, duration))
    print("{:0.5f} Images/s".format(n_image/float(duration)))
    print("Total time: {}s".format(end-overall_start))

## Input pipeline experiment

### 1. Original pipeline

In [6]:
# Extract
path_label_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
path_label_ds = path_label_ds.shuffle(buffer_size=image_count)
path_label_ds = path_label_ds.repeat(NUM_EPOCHS)

image_label_ds = path_label_ds.map(load_and_preprocess_from_path_label)
image_label_ds = image_label_ds.batch(BATCH_SIZE)

# Load
timeit(image_label_ds)

12000 images: 3.40 s
12607 images: 3.565103530883789 s
3536.22269 Images/s
Total time: 3.5651044845581055s


### 2. Map with num_parallel_calls

In [7]:
# Extract
path_label_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
path_label_ds = path_label_ds.shuffle(buffer_size=image_count)
path_label_ds = path_label_ds.repeat(NUM_EPOCHS)
path_label_ds = path_label_ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count, count=NUM_EPOCHS))


image_label_ds = path_label_ds.map(load_and_preprocess_from_path_label, num_parallel_calls=4)
image_label_ds = image_label_ds.batch(BATCH_SIZE)

# Load
timeit(image_label_ds)

W0703 04:53:54.214915 140646928393984 deprecation.py:323] From <ipython-input-7-c80bf333129f>:7: shuffle_and_repeat (from tensorflow.python.data.experimental.ops.shuffle_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.Dataset.shuffle(buffer_size, seed)` followed by `tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take care of using the fused implementation.


12000 images: 1.43 s
12607 images: 1.497429370880127 s
8419.09491 Images/s
Total time: 1.4974303245544434s


### 3. tf.data.experimental.shuffle_and_repeat

See more about [tf.data.experimental.shuffle_and_repeat](https://www.tensorflow.org/api_docs/python/tf/data/experimental/shuffle_and_repeat)
```
tf.data.experimental.shuffle_and_repeat(
    buffer_size,
    count=None,
    seed=None
)
```

In [8]:
# Extract
path_label_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
path_label_ds = path_label_ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count, count=NUM_EPOCHS))


image_label_ds = path_label_ds.map(load_and_preprocess_from_path_label, num_parallel_calls=4)
image_label_ds = image_label_ds.batch(BATCH_SIZE)

# Load
timeit(image_label_ds)

12000 images: 1.37 s
12607 images: 1.4445338249206543 s
8727.38304 Images/s
Total time: 1.4445347785949707s


### 4. tf.data.experimental.map_and_batch

See more about [tf.data.experimental.map_and_batch](https://www.tensorflow.org/api_docs/python/tf/data/experimental/map_and_batch)
```
tf.data.experimental.map_and_batch(
    map_func,
    batch_size,
    num_parallel_batches=None,
    drop_remainder=False,
    num_parallel_calls=None
)
```

In [9]:
# Extract
path_label_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
path_label_ds = path_label_ds.shuffle(buffer_size=image_count)
path_label_ds = path_label_ds.repeat(NUM_EPOCHS)

image_label_ds = path_label_ds.apply(tf.data.experimental.map_and_batch(load_and_preprocess_from_path_label, BATCH_SIZE, num_parallel_calls=4))

# Load
timeit(image_label_ds)

W0703 04:53:57.516138 140646928393984 deprecation.py:323] From <ipython-input-9-a731ffa1e19b>:8: map_and_batch (from tensorflow.python.data.experimental.ops.batching) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.data.Dataset.map(map_func, num_parallel_calls)` followed by `tf.data.Dataset.batch(batch_size, drop_remainder)`. Static tf.data optimizations will take care of using the fused implementation.


12000 images: 1.38 s
12607 images: 1.4397857189178467 s
8756.16408 Images/s
Total time: 1.439786434173584s


### 5. shuffle_and_repeat + map_and_batch

In [10]:
# Extract
path_label_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
path_label_ds = path_label_ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count, count=NUM_EPOCHS))
image_label_ds = path_label_ds.apply(tf.data.experimental.map_and_batch(load_and_preprocess_from_path_label, BATCH_SIZE, num_parallel_calls=4))

# Load
timeit(image_label_ds)

12000 images: 1.40 s
12607 images: 1.4625129699707031 s
8620.09449 Images/s
Total time: 1.4625139236450195s


### 6. Prefetch

See more about [tf.data.experimental.prefetch_to_device](https://www.tensorflow.org/api_docs/python/tf/data/experimental/prefetch_to_device)
```
tf.data.experimental.prefetch_to_device(
    device,
    buffer_size=None
)
```

In [11]:
# Extract
path_label_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
path_label_ds = path_label_ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count, count=NUM_EPOCHS))

image_label_ds = path_label_ds.map(load_and_preprocess_from_path_label, num_parallel_calls=4)
image_label_ds = image_label_ds.batch(BATCH_SIZE)

# Load
image_label_ds = image_label_ds.prefetch(buffer_size=1) # Only on CPU
#image_label_ds = image_label_ds.apply(tf.data.experimental.prefetch_to_device(device="/gpu:0", buffer_size=1)) # Must be final Dataset in input pipeline
timeit(image_label_ds)

12000 images: 1.39 s
12607 images: 1.4531762599945068 s
8675.47891 Images/s
Total time: 1.4531772136688232s


### 7. Cache

See more about [tf.data.Dataset.cache](https://www.tensorflow.org/tutorials/load_data/images#cache)
                                       
Use tf.data.Dataset.cache to easily cache calculations across epochs. This is especially performant if the dataq fits in memory
```
ds = image_label_ds.cache()
```

One disadvantage to using an in memory cache is that the cache must be rebuilt on each run, giving the same startup delay each time the dataset is started:
If the data doesn't fit in memory, use a cache file. 
The cache file also has the advantage that it can be used to quickly restart the dataset without rebuilding the cache. Note how much faster it is the second time:


```
ds = image_label_ds.cache(filename='./cache.tf-data')
```

In [12]:
# Extract
ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count, count=NUM_EPOCHS))
#path_label_ds = path_label_ds.cache(filename='./cache.tf-path')

ds = ds.map(load_and_preprocess_from_path_label, num_parallel_calls=4)
ds = ds.batch(BATCH_SIZE)

# Load
ds = ds.cache(filename='./cache.tf-ds')
ds = ds.prefetch(buffer_size=1) # Only on CPU
#image_label_ds = image_label_ds.apply(tf.data.experimental.prefetch_to_device(device="/gpu:0", buffer_size=1)) # Must be final Dataset in input pipeline
timeit(ds)

12000 images: 0.15 s
12607 images: 0.1553668975830078 s
81143.41083 Images/s
Total time: 0.15536808967590332s


In [13]:
# Extract
path_label_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))

# Transform
path_label_ds = path_label_ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=image_count, count=NUM_EPOCHS))
path_label_ds = path_label_ds.cache(filename='./cache.tf-path')

image_label_ds = path_label_ds.map(load_and_preprocess_from_path_label, num_parallel_calls=4)
image_label_ds = image_label_ds.batch(BATCH_SIZE)

# Load
image_label_ds = image_label_ds.cache(filename='./cache.tf-image')
image_label_ds = image_label_ds.prefetch(buffer_size=10) # Only on CPU
#image_label_ds = image_label_ds.apply(tf.data.experimental.prefetch_to_device(device="/gpu:0", buffer_size=1)) # Must be final Dataset in input pipeline
timeit(image_label_ds)

12000 images: 0.14 s
12607 images: 0.14266276359558105 s
88369.24003 Images/s
Total time: 0.14266395568847656s
