In [1]:
import numpy as np
import imgaug as ia
from imgaug import augmenters as iaa
import time
from imgaug.augmentables.batches import UnnormalizedBatch
import pandas as pd
import cProfile
%matplotlib inline

aug = iaa.Sequential([
    iaa.PiecewiseAffine(scale=0.05, nb_cols=6, nb_rows=6),  # very slow
    iaa.Fliplr(0.5),  # very fast
    iaa.CropAndPad(px=(-10, 10))  # very fast
])

In [2]:
def do_tests(BATCH_SIZE, NB_BATCHES):
    image = ia.quokka_square(size=(256, 256))
    images = [np.copy(image) for _ in range(BATCH_SIZE)]
    batches = [UnnormalizedBatch(images=images) for _ in range(NB_BATCHES)]

    results = {}

    ## sequential one at time
    tic = time.time()
    batches_ = []
    for _ in range(NB_BATCHES):
        images_ = []
        for _ in range(BATCH_SIZE):
            images_.append(aug.augment_image(image))
        batches_.append(images_)

    toc = time.time()
    delta = (toc - tic) * 10 ** 3
    results['sequential'] = delta
    
    ## with batches not multitrading
    tic = time.time()
    batches_aug = list(aug.augment_batches(batches, background=False))  # list() converts generator to list
    toc = time.time()

    delta = (toc - tic) * 10 ** 3
    results['batches_no_multi'] = delta

    ## with batches multitrading
    tic = time.time()
    batches_aug = list(aug.augment_batches(batches, background=True))  # background=True for multicore aug
    toc = time.time()

    delta = (toc - tic) * 10 ** 3
    results['batches_multi'] = delta

    ## with pool
    tic = time.time()
    
    with aug.pool(processes=-1, maxtasksperchild=20, seed=1) as pool:
        batches_aug = pool.map_batches(batches)
    toc = time.time()

    delta = (toc - tic) * 10 ** 3
    results['batches_pool'] = delta

    return results

# experiment 1

In [3]:
results = do_tests(BATCH_SIZE = 16, NB_BATCHES = 20)
pd.DataFrame.from_dict(results, orient='index', columns=['ms'])

Unnamed: 0,ms
sequential,24045.284986
batches_no_multi,21913.042784
batches_multi,8320.817709
batches_pool,8527.624607


# experiment 2

In [4]:
results = do_tests(BATCH_SIZE = 1, NB_BATCHES = 320)
pd.DataFrame.from_dict(results, orient='index', columns=['ms'])

Unnamed: 0,ms
sequential,23388.743877
batches_no_multi,22255.701542
batches_multi,8496.857882
batches_pool,8103.905439
