In [None]:
%reload_ext autoreload
%autoreload 2

from nb_002 import *

In [None]:
DATA_PATH = Path('../data')
PATH = DATA_PATH/'imagenet'

Test of all the different possiblities for a pipeline on imagenet including:
- resizing the image so that the lower dimension is 224
- random rotate -10 to 10 degrees
- random scale 0.9 to 1.1
- random flip
- random crop

Test on the first 100 batches of imagenet (with shuffle=False)

# Torchvision

In [None]:
import torchvision

In [None]:
class TVImageDataset(Dataset):
    def __init__(self, folder, tfms):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = torchvision.transforms.Compose(tfms)
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = Image.open(self.fns[i]).convert('RGB')
        x = self.tfms(x)
        return x,self.y[i]

In [None]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

In [None]:
def get_dataloader(ds, bs, shuffle, stats, device = None, sampler=None):
    if device is None: device = default_device
    dl = DataLoader(ds, batch_size=bs, shuffle=shuffle,num_workers=8, sampler=sampler, pin_memory=True)
    return DeviceDataLoader(dl, device, stats)

In [None]:
sz, bs = 224, 192
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [torchvision.transforms.RandomRotation(10),
              torchvision.transforms.RandomResizedCrop(sz, scale=(0.5, 1.0), ratio=(1.,1.)),
              torchvision.transforms.RandomHorizontalFlip(),
              torchvision.transforms.ToTensor()]

In [None]:
train_ds = TVImageDataset(PATH/'train', train_tfms)

In [None]:
default_device = default_device = torch.device('cuda', 0)

In [None]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [None]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

37.3s

# Pipeline with grid_sampler

Needs the PR https://github.com/pytorch/pytorch/pull/9961/files to make grid_sample fast.

In [None]:
def dict_groupby(iterable, key=None):
    return {k:list(v) for k,v in itertools.groupby(sorted(iterable, key=key), key=key)}

def resolve_pipeline(tfms, **kwargs):
    tfms = listify(tfms)
    if len(tfms)==0: return noop
    grouped_tfms = dict_groupby(tfms, lambda o: o.__annotations__['return'])
    lighting_tfms,coord_tfms,affine_tfms,pixel_tfms,final_tfms = map(grouped_tfms.get, TfmType)
    lighting_tfm = apply_lighting_tfms(lighting_tfms)
    affine_tfm = compose_affine_tfms(affine_tfms, funcs=coord_tfms, **kwargs)
    pixel_tfm = compose_tfms(pixel_tfms)
    final_tfm = compose_tfms(final_tfms)
    return lambda x,**k: final_tfm(affine_tfm(lighting_tfm(pixel_tfm(x)), **k))

In [None]:
class TransformedImageDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        x = pil2tensor(x)
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms, size=self.sz)(x) 
        return x,self.y[i]

In [None]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              zoom_tfm(scale=(0.9,1.1),p=0.75),
              rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [None]:
train_ds = TransformedImageDataset(PATH/'train', sz, train_tfms)

In [None]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [None]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

31.5s

Now without affine augmentation

In [None]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75),
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [None]:
train_ds = TransformedImageDataset(PATH/'train', sz, train_tfms)

In [None]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [None]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

28s

# With PIL

In [None]:
from PIL import Image

In [None]:
class TransformedImageDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        w,h = x.size
        if w < h: w,h = self.sz,int(self.sz * h / w)
        else:     w,h = int(self.sz * w / h),self.sz
        theta = random.uniform(-10,10) * math.pi / 180 if random.random() < 0.75 else 0
        scale = random.uniform(0.9,1.1) if random.random() < 0.75 else 1
        x = x.transform((w,h), Image.AFFINE, (cos(theta)/scale, -sin(theta), 0, sin(theta), cos(theta)/scale, 0), Image.BILINEAR)
        x = pil2tensor(x)
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x) 
        return x,self.y[i]

In [None]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75), Those are done in the dataset
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [None]:
train_ds = TransformedImageDataset(PATH/'train', sz, train_tfms)

In [None]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [None]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

15.8s ^^

Just PIL resize

In [None]:
class TransformedImageDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        w,h = x.size
        if w < h: w,h = self.sz,int(self.sz * h / w)
        else:     w,h = int(self.sz * w / h),self.sz
        x = x.resize((w,h))
        x = pil2tensor(x)
        x = F.interpolate(x[None], size=(self.sz,self.sz),mode='bilinear')
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x[0]) 
        return x,self.y[i]

In [None]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75),
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [None]:
train_ds = TransformedImageDataset(PATH/'train', sz, train_tfms)

In [None]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [None]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

11.6s

# Just F.interpolate

In [None]:
class TransformedImageDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        x = pil2tensor(x)
        x = F.interpolate(x[None], size=(self.sz,self.sz),mode='bilinear')
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms)(x[0]) 
        return x,self.y[i]

In [None]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              #zoom_tfm(scale=(0.9,1.1),p=0.75),
              #rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [None]:
train_ds = TransformedImageDataset(PATH/'train', sz, train_tfms)

In [None]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [None]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

13.3s

# On the GPU

grid_sample used to be faster on the GPU so let's try to do that step there by moving the image on the GPU during the affine transformation.

In [None]:
def do_affine(img, m=None, func=None, size=None, **kwargs):
    img = img.cuda(non_blocking=True)
    if size is None: size = img.size()
    elif isinstance(size, int):
        if img.size(1) < img.size(2): size = (img.size(0),size,int(img.size(2)*size/img.size(1)))
        else: size = (img.size(0),int(img.size(1)*size/img.size(2)),size)
    if m is None:
        if img.shape==size: return img
        else: m=eye_new(img, 3)
    m = m.cuda(non_blocking=True)
    c = affine_grid(img,  img.new_tensor(m), size=size)
    if func is not None: c = func(c)
    return grid_sample(img, c, **kwargs)

In [None]:
def dict_groupby(iterable, key=None):
    return {k:list(v) for k,v in itertools.groupby(sorted(iterable, key=key), key=key)}

def resolve_pipeline(tfms, **kwargs):
    tfms = listify(tfms)
    if len(tfms)==0: return noop
    grouped_tfms = dict_groupby(tfms, lambda o: o.__annotations__['return'])
    lighting_tfms,coord_tfms,affine_tfms,pixel_tfms,final_tfms = map(grouped_tfms.get, TfmType)
    lighting_tfm = apply_lighting_tfms(lighting_tfms)
    affine_tfm = compose_affine_tfms(affine_tfms, funcs=coord_tfms, **kwargs)
    pixel_tfm = compose_tfms(pixel_tfms)
    final_tfm = compose_tfms(final_tfms)
    return lambda x,**k: final_tfm(affine_tfm(lighting_tfm(pixel_tfm(x)), **k))

In [None]:
class TransformedImageDataset(Dataset):
    def __init__(self, folder, sz, tfms=None, classes=None):
        self.fns, self.y = [], []
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
        self.classes = classes
        for i, cls in enumerate(classes):
            fnames = get_image_files(folder/cls)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.sz, self.tfms = sz, tfms

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        x = pil2tensor(x)
        if self.tfms is not None:
            x = resolve_pipeline(self.tfms, size=self.sz)(x) 
        return x,self.y[i]

In [None]:
sz, bs = 224, 64
stats = (np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
train_tfms = [flip_lr_tfm(p=0.5),
              zoom_tfm(scale=(0.9,1.1),p=0.75),
              rotate_tfm(degrees=(-10,10.),p=0.75),
              crop_tfm(size=sz)]

In [None]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            #x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

In [None]:
def get_dataloader(ds, bs, shuffle, stats, device = None, sampler=None):
    if device is None: device = default_device
    dl = DataLoader(ds, batch_size=bs, shuffle=shuffle,num_workers=8, sampler=sampler, pin_memory=True)
    return DeviceDataLoader(dl, device, stats)

In [None]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

def get_dataloader(ds, bs, shuffle, stats, device = None, sampler=None):
    if device is None: device = default_device
    dl = DataLoader(ds, batch_size=bs, shuffle=shuffle,num_workers=8, sampler=sampler, pin_memory=True)
    return DeviceDataLoader(dl, device, stats)

In [None]:
train_ds = TransformedImageDataset(PATH/'train', sz, train_tfms)

In [None]:
train_dl = get_dataloader(train_ds, bs, shuffle=False, stats=stats)

In [None]:
train_iter = iter(train_dl)
%time for i in tqdm(range(100)): x,y = next(train_iter)

No significant change from the CPU.