In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
from nb_004 import *

In [None]:
DATA_PATH = Path('../data')
PATH = DATA_PATH/'caltech101'

In [None]:
data_mean,data_std = map(tensor, ([0.5355,0.5430,0.5280], [0.2909,0.2788,0.2979]))

In [None]:
#export
class ImageDataset(Dataset):
    def __init__(self, fns, labels, classes=None):
        if classes is None: classes = list(set(labels))
        self.classes = classes
        self.class2idx = {v:k for k,v in enumerate(classes)}
        self.fns = np.array(fns)
        self.y = [self.class2idx[o] for o in labels]
        
    @classmethod
    def from_folder(cls, folder, classes=None, test_pct=0., tfms=None):
        if classes is None: classes = [cls.name for cls in find_classes(folder)]
            
        fns,labels = [],[]
        for cl in classes:
            fnames = get_image_files(folder/cl)
            fns += fnames
            labels += [cl] * len(fnames)
            
        if test_pct==0.: return cls(fns, labels)
        fns,labels = np.array(fns),np.array(labels)
        is_test = np.random.uniform(size=(len(fns),)) < test_pct
        return cls(fns[~is_test], labels[~is_test]), cls(fns[is_test], labels[is_test])

    def __len__(self): return len(self.fns)

    def __getitem__(self,i):
        x = PIL.Image.open(self.fns[i]).convert('RGB')
        x = pil2tensor(x)
        return x,self.y[i]

In [None]:
@reg_transform
def crop_with_ratio(x, scale:uniform, ratio:uniform, invert:rand_bool, row_pct:uniform, col_pct:uniform) -> TfmType.Start:
    #scale, ratio and invert are supposed to have a size corresponding to the number of attempts before fallback.
    for s,r,i in zip(scale, ratio, invert):
        area = x.size(1) * x.size(2)
        target_area = area * s
        cols = int(round(math.sqrt(target_area * r)))
        rows = int(round(math.sqrt(target_area / r)))

        if i: cols,rows = rows,cols

        if cols <= x.size(2) and rows <= x.size(1):
            row = int((x.size(1)-rows+1)*row_pct)
            col = int((x.size(2)-cols+1)*col_pct)
            return x[:, row:row+rows, col:col+cols].contiguous()
    # Fallback
    rows = min(x.size(1), x.size(2))
    row = (x.size(1) - rows) // 2
    col = (x.size(2) - rows) // 2
    return x[:, row:row+rows, col:col+rows].contiguous()

In [None]:
@reg_transform
def center_crop(x, b:uniform=0.5) -> TfmType.Pixel:
    rows = min(x.size(1), x.size(2))
    row = (x.size(1) - rows) // 2
    col = (x.size(2) - rows) // 2
    return x[:, row:row+rows, col:col+rows].contiguous()

In [None]:
#random_resized_crop = crop_with_ratio_tfm(scale=(0.5,1.,10), ratio=(0.75,1.33,10),invert=(0.5,10),
#                                          row_pct=(0,1.), col_pct=(0,1.))
random_resized_crop = zoom_squish_tfm(scale=(0.5,1,10), squish=(0.75,1.33,10), invert=(0.5,10),
                                      row_pct=(0,1.), col_pct=(0,1.))
center_crop1 = zoom_squish_tfm(scale=(1.1,1.1,2), squish=(1,1,2), invert=(0.5,2))

In [None]:
sz = 224
trn_tfms = [random_resized_crop,
            flip_lr_tfm(p=0.5),
            normalize_tfm(mean=data_mean,std=data_std)] #torchvision.transforms.RandomRotation(10),
val_tfms = [center_crop1,
            normalize_tfm(mean=data_mean,std=data_std)]

In [None]:
#classes = ['airplanes','Motorbikes','Faces','watch','Leopards']
np.random.seed(42)
train_ds,valid_ds = ImageDataset.from_folder(PATH, test_pct=0.2)
classes = train_ds.classes

In [None]:
train_ds = TfmDataset(train_ds, trn_tfms, size=224)
valid_ds = TfmDataset(valid_ds, val_tfms, size=224)

In [None]:
x,y = train_ds[0]
x,y = valid_ds[0]

In [None]:
data = DataBunch(train_ds, valid_ds, bs=64, num_workers=8)

In [None]:
def conv_layer(ni, nf, ks=3, stride=1):
    return nn.Sequential(
        nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2),
        nn.BatchNorm2d(nf),
        nn.LeakyReLU(negative_slope=0.1, inplace=True))

class ResLayer(nn.Module):
    def __init__(self, ni):
        super().__init__()
        self.conv1=conv_layer(ni, ni//2, ks=1)
        self.conv2=conv_layer(ni//2, ni, ks=3)
        
    def forward(self, x): return x + self.conv2(self.conv1(x))

class Darknet(nn.Module):
    def make_group_layer(self, ch_in, num_blocks, stride=1):
        return [conv_layer(ch_in, ch_in*2,stride=stride)
               ] + [(ResLayer(ch_in*2)) for i in range(num_blocks)]

    def __init__(self, num_blocks, num_classes, nf=32):
        super().__init__()
        layers = [conv_layer(3, nf, ks=3, stride=1)]
        for i,nb in enumerate(num_blocks):
            layers += self.make_group_layer(nf, nb, stride=2-(i==1))
            nf *= 2
        layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
        self.layers = nn.Sequential(*layers)
    
    def forward(self, x): return self.layers(x)

In [None]:
model = Darknet([1, 2, 4, 6, 2, 1], num_classes=len(classes), nf=16).cuda()

In [None]:
class Recorder(Callback):
    beta = 0.98
    
    def __init__(self, opt, train_dl=None):
        self.opt,self.train_dl = opt,train_dl
    
    def on_train_begin(self):
        self.epoch,self.n,self.avg_loss = 0,0,0.
        self.losses,self.val_losses,self.lrs,self.moms,self.metrics = [],[],[],[],[]
    
    def on_batch_begin(self, xb, yb):
        self.lrs.append(self.opt.lr)
        self.moms.append(self.opt.mom)
        return xb, yb
    
    def on_backward_begin(self, loss, out):
        #We record the loss here before any other callback has a chance to modify it.
        self.n += 1
        self.avg_loss = self.beta * self.avg_loss + (1-self.beta) * loss.item()
        self.smooth_loss = self.avg_loss / (1 - self.beta ** self.n)
        self.losses.append(self.smooth_loss)
        if self.train_dl is not None and self.train_dl.progress_func is not None: 
            self.train_dl.gen.set_postfix_str(self.smooth_loss)
    
    def on_epoch_end(self, val_metrics):
        if val_metrics is not None:
            self.val_losses.append(val_metrics[0])
            if len(val_metrics) > 1: self.metrics.append(val_metrics[1:])
            print(self.epoch, self.smooth_loss, *val_metrics)
        self.epoch += 1
    
    def plot_lr(self, show_moms=False):
        iterations = list(range(len(learn.recorder.lrs)))
        if show_moms:
            fig, axs = plt.subplots(1,2, figsize=(12,4))
            axs[0].plot(iterations, self.lrs)
            axs[1].plot(iterations, self.moms)
        else: plt.plot(iterations, self.lrs)
    
    def plot(self, skip_start=10, skip_end=5):
        lrs = self.lrs[skip_start:-skip_end] if skip_end > 0 else self.lrs[skip_start:]
        losses = self.losses[skip_start:-skip_end] if skip_end > 0 else self.losses[skip_start:]
        fig, ax = plt.subplots(1,1)
        ax.plot(lrs, losses)
        ax.set_xscale('log') 

## First training: SGD with 1cycle

In [None]:
def accuracy(out, yb):
    preds = torch.max(out, dim=1)[1]
    return (preds==yb).float().mean()

In [None]:
from typing import Callable, List

In [None]:
@dataclass
class Learner():
    
    loss_fn: Callable = F.cross_entropy
    opt_fn: Callable = optim.SGD
    metrics: List = None
    
    def __init__(self, data, model):
        self.data,self.model = data,model.to(data.device)

    def fit(self, epochs, lr, wd=0, callbacks=None):
        self.opt = HPOptimizer(self.model.parameters(), self.opt_fn, init_lr=lr)
        self.opt.wd = wd
        self.recorder = Recorder(self.opt, self.data.train_dl)
        callbacks.insert(0, self.recorder)
        fit(epochs, self.model, self.loss_fn, self.opt, self.data, callbacks=callbacks, metrics=self.metrics)
        
    def lr_find(self, start_lr=1e-5, end_lr=10, num_it=200):
        cb = LRFinder(self, start_lr, end_lr, num_it)
        a = int(np.ceil(num_it/len(self.data.train_dl)))
        self.fit(a, start_lr, callbacks=[cb])

In [None]:
#export
def loss_batch(model, xb, yb, loss_fn, opt=None, cb_handler=None, metrics=None):
    out = model(xb)
    loss = loss_fn(out, yb)
    mets = [f(out,yb).item() for f in metrics] if metrics is not None else []
    
    if opt is not None:
        if cb_handler is not None: loss = cb_handler.on_backward_begin(loss, out)
        loss.backward()
        if cb_handler is not None: cb_handler.on_backward_end()
        opt.step()
        if cb_handler is not None: cb_handler.on_step_end()
        opt.zero_grad()
        
    return (loss.item(),) + tuple(mets) + (len(xb),)

In [None]:
#export
def fit(epochs, model, loss_fn, opt, data, callbacks=None, metrics=None):
    
    cb_handler = CallbackHandler(callbacks)
    cb_handler.on_train_begin()
    
    for epoch in tnrange(epochs):
        model.train()
        cb_handler.on_epoch_begin()
        
        for xb,yb in data.train_dl:
            xb, yb = cb_handler.on_batch_begin(xb, yb)
            loss,_ = loss_batch(model, xb, yb, loss_fn, opt, cb_handler)
            if cb_handler.on_batch_end(loss): break
        
        if hasattr(data,'valid_dl') and data.valid_dl is not None:
            model.eval()
            with torch.no_grad():
                *val_metrics,nums = zip(*[loss_batch(model, xb, yb, loss_fn, metrics=metrics)
                                for xb,yb in data.valid_dl])
            val_metrics = [np.sum(np.multiply(val,nums)) / np.sum(nums) for val in val_metrics]
            
        else: val_metrics=None
        if cb_handler.on_epoch_end(val_metrics): break
        
    cb_handler.on_train_end()

In [None]:
class TrueWD(Callback):
    
    def __init__(self, learn, wd):
        self.learn,self.wd = learn,wd
        
    def on_train_begin(self):
        self.opt = self.learn.opt
        self.opt.wd = 0
        
    def on_backward_end(self):
        for pg in self.opt.opt.param_groups:
            for p in pg['params']:
                p.data.mul_(1 - self.wd * pg['lr'])

In [None]:
model = Darknet([1, 2, 4, 6, 3], num_classes=len(classes), nf=16).cuda()
learn = Learner(data, model)
learn.loss_fn = F.cross_entropy
learn.metrics = [accuracy]
learn.opt_fn = partial(optim.Adam, betas=(0.95,0.99))

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
scheds = [OneCycleScheduler(learn, 4e-3, 30, div_factor=10, pct_end=0.1), TrueWD(learn, 0.3)]
learn.fit(30, 2e-3, wd=1e-4, callbacks=scheds)

2e-3, 0.1, 76.4%
1e-3, 0.1, 76.3%
5e-4, 0.1, 76.5%
4e-3, 0.1, 77.8%

## With perspective wrap

In [None]:
def find_coeffs(ori_pts, targ_pts):
    matrix = []
    for p1, p2 in zip(targ_pts, ori_pts):
        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])

    A = FloatTensor(matrix)
    B = FloatTensor(ori_pts).view(8)
    #The 8 scalars we seek are solution of AX = B, we use the pseudo inverse to compute them.
    
    res = torch.mv(torch.mm(torch.inverse(torch.mm(A.t(),A)), A.t()), B)
    #res = numpy.dot(numpy.linalg.inv(A.T * A) * A.T, B)
    return res

In [None]:
def add_ones(coords):
    coords = coords.view(-1,2)
    ones = torch.ones(coords.size(0)).unsqueeze(1)
    coords = torch.cat([coords, ones], 1)
    return coords

In [None]:
def apply_perspective(coords, coeffs):
    ori_size = coords.size()
    #compress all the dims expect the last one ang adds ones, coords become N * 3
    coords = add_ones(coords)
    #Transform the coeffs in a 3*3 matrix with a 1 at the bottom left
    coeffs = torch.cat([coeffs, FloatTensor([1])]).view(3,3)
    coords = torch.mm(coords, coeffs.t())
    coords.mul_(1/coords[:,2].unsqueeze(1))
    return coords[:,:2].view(ori_size)

In [None]:
@reg_transform
def perspective_warp(c, img_size, magnitude:uniform=0) -> TfmType.Coord:
    magnitude = magnitude.view(4,2)
    ori_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
    targ_pts = [[x+m for x,m in zip(xs, ms)] for xs, ms in zip(ori_pts, magnitude)]
    coeffs = find_coeffs(ori_pts, targ_pts)
    return apply_perspective(c, coeffs)

In [None]:
def rand_int(low,high): return random.randint(low, high)

In [None]:
@reg_affine
def zoom(scale: uniform = 1.0, row_pct:uniform = 0.5, col_pct:uniform = 0.5) -> TfmType.Affine:
    s = 1-1/scale
    col_c = s * (2*col_pct - 1)
    row_c = s * (2*row_pct - 1)
    return [[1/scale, 0,       col_c],
            [0,       1/scale, row_c],
            [0,       0,       1.    ]]

In [None]:
@reg_transform
def tilt(c, img_size, direction:rand_int, magnitude:uniform=0) -> TfmType.Coord:
    ori_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
    if direction == 0:   targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1+magnitude]]
    elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1+magnitude], [1,-1], [1,1]]
    elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1+magnitude,1]]
    elif direction == 3: targ_pts = [[-1-magnitude,-1], [-1,1], [1+magnitude,-1], [1,1]]  
    coeffs = find_coeffs(ori_pts, targ_pts)
    return apply_perspective(c, coeffs)

In [None]:
@reg_affine
def zoom1(scale: uniform = 1.0, row_pct:uniform = 0.5, col_pct:uniform = 0.5) -> TfmType.Affine:
    s = 1-math.sqrt(scale)
    col_c = s * (2*col_pct - 1)
    row_c = s * (2*row_pct - 1)
    return [[math.sqrt(scale), 0,       col_c],
            [0,       math.sqrt(scale), row_c],
            [0,       0,       1.    ]]

In [None]:
@reg_affine
def stretch(scale: uniform = 1.0) -> TfmType.Affine:
    return [[math.sqrt(scale), 0,       0],
            [0,       1/math.sqrt(scale), 0],
            [0,       0,       1.    ]]

In [None]:
sz = 224
trn_tfms = [stretch_tfm(scale=(0.75,1.33)),
            zoom_tfm(scale=(0.08,0.8), row_pct=(0,1.), col_pct=(0,1.)),
            flip_lr_tfm(p=0.5),
            center_crop_tfm(b=(0,1)),
            normalize_tfm(mean=data_mean,std=data_std)] #torchvision.transforms.RandomRotation(10),
val_tfms = [center_crop_tfm(b=(0,1)),
            normalize_tfm(mean=data_mean,std=data_std)]

In [None]:
#classes = ['airplanes','Motorbikes','Faces','watch','Leopards']
np.random.seed(42)
train_ds,valid_ds = ImageDataset.from_folder(PATH, test_pct=0.2)
classes = train_ds.classes

In [None]:
train_ds = TfmDataset(train_ds, trn_tfms, size=224)
valid_ds = TfmDataset(valid_ds, val_tfms, size=224)

In [None]:
x,y = train_ds[0]
x,y = valid_ds[0]

In [None]:
data = DataBunch(train_ds, valid_ds, bs=64, num_workers=8)

In [None]:
model = Darknet([1, 2, 4, 6, 3], num_classes=len(classes), nf=16).cuda()
learn = Learner(data, model)
learn.loss_fn = F.cross_entropy
learn.metrics = [accuracy]
learn.opt_fn = partial(optim.Adam, betas=(0.95,0.99))

In [None]:
scheds = [OneCycleScheduler(learn, 4e-3, 30, div_factor=10, pct_end=0.1), TrueWD(learn, 0.1)]
learn.fit(30, 2e-3, wd=1e-4, callbacks=scheds)

In [None]:
show_image_batch(data.train_dl, classes, rows=4)