## Old fastai

In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
from fastai.conv_learner import *
from fastai.models.cifar10.wideresnet import wrn_22
torch.backends.cudnn.benchmark = True
PATH = Path("../data/cifar10/")
os.makedirs(PATH,exist_ok=True)

In [None]:
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

bs=128
sz=32

In [None]:
tfms = tfms_from_stats(stats, 32, aug_tfms=[RandomCrop(32), RandomFlip()], pad=4)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [None]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1

In [None]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

Standard DawnBench result with one GPU: 94% accuracy in 22min47s.

## New pipeline + openCV (like in old fastai)

In [None]:
default_device = torch.device('cuda', 0)

In [None]:
def find_classes(folder):
    classes = [d for d in folder.iterdir()
               if d.is_dir() and not d.name.startswith('.')]
    classes.sort(key=lambda d: d.name)
    return classes

def get_image_files(c):
    return [o for o in list(c.iterdir())
            if not o.name.startswith('.') and not o.is_dir()]

In [None]:
from PIL import Image
from torch.utils.data import Dataset as Dataset1

class ImageDataset1(Dataset1):#Renamed to avoid conflict with fastai ImageDataset
    def __init__(self, folder, tfms):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = tfms
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = open_image(self.fns[i])
        for tfm in self.tfms: x,_ = tfm(x, None) 
        return x,self.y[i]

In [None]:
class DeviceDataLoader():
    def __init__(self, dl, device, stats):
        self.dl,self.device = dl,device
        self.m, self.s = map(lambda x:torch.tensor(x, dtype=torch.float32, device=device), stats)
        
    def __iter__(self):
        for b in self.dl:
            x, y = b[0].to(self.device),b[1].to(self.device)
            x = (x - self.m[None,:,None,None]) / self.s[None,:,None,None]
            yield x,y
    
    def __len__(self): return (len(self.dl))

Let's get the DataLoader from pytorch since fastai replaced the definition.

In [None]:
from torch.utils.data.dataloader import DataLoader as DataLoader1
def get_dataloader(ds, bs, shuffle, device, stats):
    return DeviceDataLoader(DataLoader1(ds, batch_size=bs, shuffle=shuffle,num_workers=8), device, stats)

In [None]:
class DataBunch():
    def __init__(self, trn_ds, val_ds, stats, bs=64, device=None):
        self.device = default_device if device is None else device
        if hasattr(trn_ds, 'classes'): self.classes = trn_ds.classes
        self.trn_dl = get_dataloader(trn_ds, bs,   shuffle=True,  device=self.device, stats=stats)
        self.val_dl = get_dataloader(val_ds, bs*2, shuffle=False, device=self.device, stats=stats)

    @classmethod
    def from_files(cls, Path, trn_tfms, val_tfms, stats, trn_name='train', val_name='valid', bs=64, device=None):
        trn_ds, val_ds = ImageDataset1(Path/trn_name, trn_tfms), ImageDataset1(Path/val_name, val_tfms)
        return cls(trn_ds, val_ds, stats, bs, device)

In [None]:
tfms = tfms_from_stats(stats, 32, aug_tfms=[RandomCrop(32), RandomFlip()], pad=4)
tfms[0].tfms.pop(-2)
tfms[1].tfms.pop(-2)
data = DataBunch.from_files(PATH, tfms[0].tfms, tfms[1].tfms, stats, bs=bs, val_name='test')

In [None]:
tfms1 = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms1, bs=bs, val_name='test')

In [None]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [None]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

The new dataloader in pytorch is fast! It only takes 13min47s to reach the 94%!

## New pipeline + torchvision

In [None]:
from PIL import Image
from torch.utils.data import Dataset as Dataset1

class ImageDataset1(Dataset1):#Renamed to avoid conflict with fastai ImageDataset
    def __init__(self, folder, tfms):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = torchvision.transforms.Compose(tfms) if tfms != [] else None
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = Image.open(self.fns[i])
        if self.tfms is not None: x = self.tfms(x) 
        return np.array(x, dtype=np.float32).transpose(2,0,1)/255,self.y[i]

In [None]:
trn_tfms = [torchvision.transforms.Pad(4, padding_mode='symmetric'),
            torchvision.transforms.RandomCrop(32),
            torchvision.transforms.RandomHorizontalFlip()]
val_tfms = []
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [None]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [None]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [None]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

Just a tiny bit slower but nothing remarkable.

## New pipeline + data aug on tensors

Here we do all the data aug on the torch tensors.

In [None]:
from PIL import Image
from torch.utils.data import Dataset as Dataset1

class ImageDataset1(Dataset1):#Renamed to avoid conflict with fastai ImageDataset
    def __init__(self, folder, tfms=None):
        cls_dirs = find_classes(folder)
        self.fns, self.y = [], []
        self.classes = [cls.name for cls in cls_dirs]
        for i, cls_dir in enumerate(cls_dirs):
            fnames = get_image_files(cls_dir)
            self.fns += fnames
            self.y += [i] * len(fnames)
        self.tfms = tfms
        
    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = Image.open(self.fns[i])
        x = torch.tensor(np.array(x, dtype=np.float32).transpose(2,0,1)/255)
        if self.tfms is not None: x = self.tfms(x)[0]
        return x,self.y[i]

In [None]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size):
        self.p_flip,self.pad,self.size = p_flip,pad,size
        
    def __call__(self, x):
        _, h, w = x.size()
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        x = x[:,:,a:a+self.size,b:b+self.size]
        return do_random_flip(x, self.p_flip)

In [None]:
def do_random_flip(x, prob):
    if np.random.rand() < prob:
        idx = [i for i in range(x.size(3)-1, -1, -1)]
        idx = torch.LongTensor(idx)
        return x.index_select(3, idx)
    else: return x

In [None]:
trn_tfms = CustomTfm(0.5, 4, 32)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [None]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [None]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [None]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

A bit faster than opencv. And reflect padding instead of symmetric doesn't seem to hurt.

## Same but with an interpolation

In [None]:
def interpolate(x, coords, padding='reflect'):
    if padding=='reflect':#Reflect padding isn't implemented in grid_sample yet
        coords[coords < -1] = coords[coords < -1].mul_(-1).add_(-2)
        coords[coords > 1] = coords[coords > 1].mul_(-1).add_(2)
        padding='zeros'
    return F.grid_sample(x, coords, padding_mode=padding)

In [None]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size):
        self.p_flip,self.pad,self.size = p_flip,pad,size
        
    def __call__(self, x):
        _, h, w = x.size()
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        matrix = torch.eye(3)
        matrix = matrix[:2,:]
        img_size = torch.Size([1,3,h+2*self.pad,w+2*self.pad])
        coords = F.affine_grid(matrix[None], img_size)
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        coords = coords[:,a:a+self.size,b:b+self.size,:]
        return do_random_flip(interpolate(x, coords), self.p_flip)

In [None]:
trn_tfms = CustomTfm(0.5, 4, 32)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [None]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [None]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [None]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

We don't lose time and it's still as accurate.

## Same with random flip as an affine transform

In [None]:
def affine_transform(img, matrix, interpol=True, padding='reflect'):
    """
    Applies an affine transformation to an image.
    
    Optional: only computes the new coordinates without doing the interpolation to create the new images.
    Args:
    x: a batch of images
    matrix: a matrix of size 2 by 3 describing the transformation.
            if the transformation is Ax + b, the matrix is (A|b)
    interpol: if False, returns only the new coordinates
    padding: padding to apply during the interpolation. Supports zeros, border, reflect
    
    """
    coords = F.affine_grid(matrix[None], img[None].size())
    return interpolate(img[None],coords,padding) if interpol else coords

In [None]:
def get_random_rot_matrix(degrees):
    theta = random.uniform(-degrees,degrees) * math.pi / 180
    return torch.tensor([[math.cos(theta), -math.sin(theta), 0],
                         [math.sin(theta), math.cos(theta),  0],
                         [0,               0,                1]])

In [None]:
def get_random_scale_matrix(zoom_range):
    scale = random.uniform(*zoom_range)
    return torch.tensor([[scale, 0, 0],
                         [0, scale, 0],
                         [0,  0,    1]])

In [None]:
def get_random_flip(prob):
    if np.random.rand() < prob:
        return torch.tensor([[-1, 0, 0],
                             [0,  1, 0],
                             [0,  0, 1]]).float()
    else: return torch.eye(3)

In [None]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size):
        self.p_flip,self.pad,self.size = p_flip,pad,size
        
    def __call__(self, x):
        _, h, w = x.size()
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        matrix = get_random_flip(self.p_flip)
        matrix = matrix[:2,:]
        img_size = torch.Size([1,3,h+2*self.pad,w+2*self.pad])
        coords = F.affine_grid(matrix[None], img_size)
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        coords = coords[:,a:a+self.size,b:b+self.size,:]
        return interpolate(x, coords)

In [None]:
trn_tfms = CustomTfm(0.5, 4, 32)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [None]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [None]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [None]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)

Still seems fine.

## Final pipeline

In [None]:
class CustomTfm():
    
    def __init__(self, p_flip, pad, size, size_mult):
        self.p_flip,self.pad,self.size,self.size_mult = p_flip,pad,size,size_mult
        
    def __call__(self, x):
        _, h, w = x.size()
        #Resize the image so that the lower dimension is size * size_mult
        ratio = (self.size * self.size_mult) / min(h,w)
        h,w = int(h * ratio), int(w*ratio)
        #Pads
        x = F.pad(x[None], (self.pad,self.pad,self.pad,self.pad), 'reflect') #Symmetric not implemented in F.pad
        #Affine transforms
        matrix = get_random_flip(self.p_flip)
        matrix = matrix[:2,:]
        img_size = torch.Size([1,3,h+2*self.pad,w+2*self.pad])
        coords = F.affine_grid(matrix[None], img_size)
        #Coords transforms then crop
        a = random.randint(0, h+2*self.pad-self.size) if h + 2*self.pad>= self.size else 0
        b = random.randint(0, w+2*self.pad-self.size) if w + 2*self.pad>= self.size else 0
        coords = coords[:,a:a+self.size,b:b+self.size,:]
        #Interpolation
        return interpolate(x, coords)

In [None]:
trn_tfms = CustomTfm(0.5, 4, 32, 1)
val_tfms = None
data = DataBunch.from_files(PATH, trn_tfms, val_tfms, stats, bs=bs, val_name='test')

In [None]:
tfms = tfms_from_stats(stats, 28, aug_tfms=[RandomCrop(28)], pad=0)
data1 = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, val_name='test')

In [None]:
m = wrn_22()
opt_fn = partial(optim.Adam, betas=(0.95,0.99))
learn = ConvLearner.from_model_data(m, data1, opt_fn=opt_fn)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=0.1
learn.data.trn_dl, learn.data.val_dl = data.trn_dl, data.val_dl

In [None]:
%time learn.fit(3e-3, 1, cycle_len=30, use_clr_beta=(10,7.5,0.95,0.85), wds=wd, use_wd_sched=True)