In [1]:
from torchvision import datasets
import torch
from imgaug import augmenters as iaa
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Getting dataset

In [2]:
data_folder = "../Chapter3/dataset/"
fmnist = datasets.FashionMNIST(data_folder, download=True, train=True)
tr_images, tr_targets = fmnist.data, fmnist.targets

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# Comparison

In [3]:
def tensor_to_numpy(tensor):
    return tensor.cpu().detach().numpy()

In [4]:
aug = iaa.Sequential([
    iaa.Affine(translate_px={'x': (-10, 10)}, mode='constant'),
])

**Scenario 1**: Augmenting 32 images, one at a time:

In [5]:
%%time
for i in range(32):
    aug.augment_image(tensor_to_numpy(tr_images[i]))

Wall time: 27.9 ms


**Scenario 2:** Augmenting 32 images as a batch in one go:

In [6]:
%%time
x = aug.augment_images(tensor_to_numpy(tr_images[:32]))

Wall time: 13 ms


# Batch augmentation in torch dataset

In [7]:
def tensor_list_to_numpy(tensor_list):
    numpy_list = []
    for tensor in tensor_list:
        numpy_list.append(tensor_to_numpy(tensor))
    return numpy_list

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

class FMNISTDataset(Dataset):
    def __init__(self, x, y, aug=None):
        self.x, self.y = x, y
        self.aug = aug
    def __getitem__(self, ix):
        x, y = self.x[ix], self.y[ix]
        return x, y
    def __len__(self):
        return len(self.x)
    
    def collate_fn(self, batch):
        ims, classes = list(zip(*batch))
        if self.aug:
            ims = self.aug.augment_images(images=tensor_list_to_numpy(ims))
        ims = torch.tensor(ims)[:, None, :, :].to(device) / 255
        classes = torch.tensor(classes).to(device)
        return ims, classes

In [9]:
train = FMNISTDataset(tr_images, tr_targets, aug=aug)
train_dataloader = DataLoader(train, batch_size=64, collate_fn=train.collate_fn, shuffle=True)