In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import warnings
from fastai.vision import *
from fastai.callbacks import *
from fastai.vision.models.xresnet import *
from fastai.vision.models.xresnet2 import *

In [4]:
path = untar_data(URLs.IMAGEWOOF_160)

In [5]:
(path/'train').ls()

[PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02093754'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02088364'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02089973'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02087394'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02111889'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02099601'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02105641'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02086240'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02115641'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/models'),
 PosixPath('/home/ubuntu/.fastai/data/imagewoof-160/train/n02096294')]

In [6]:
tfms = get_transforms(True, 
                      False,
                      max_rotate=15,
                      max_zoom=1.3,
                      max_lighting=0.3,
                      max_warp=0.2,
                      p_affine=0.5,
                      p_lighting=0.5)

In [7]:
sz = 128
data = (ImageList.from_folder(path=path/'train')    
        .random_split_by_pct(0.1)
        .label_from_folder()
        .transform(tfms, size=sz)
        .databunch(bs=64)
        .normalize())

In [8]:
# test data
test_data = (ImageList.from_folder(path=path/'val')
            .no_split()
            .label_from_folder()
            .transform(None, size=sz)
            .databunch(bs=64)
            .normalize(data.stats))

In [9]:
data.add_test(test_data.train_ds.x)

In [10]:
data

ImageDataBunch;

Train: LabelList (11209 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: CategoryList
n02093754,n02093754,n02093754,n02093754,n02093754
Path: /home/ubuntu/.fastai/data/imagewoof-160/train;

Valid: LabelList (1245 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: CategoryList
n02115641,n02105641,n02115641,n02096294,n02099601
Path: /home/ubuntu/.fastai/data/imagewoof-160/train;

Test: LabelList (500 items)
x: ImageList
Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128),Image (3, 128, 128)
y: EmptyLabelList
,,,,
Path: /home/ubuntu/.fastai/data/imagewoof-160/train

In [9]:
# data.show_batch()

### Baseline

In [11]:
model_name = 'resnet34'
arch = getattr(models, model_name)

learn_callbacks = [TerminateOnNaNCallback()]
learn_callback_fns = [partial(EarlyStoppingCallback, monitor='accuracy', mode='max', patience=5),
                      partial(SaveModelCallback, monitor='accuracy', mode='max',
                              name='baseline'),
                      partial(CSVLogger, filename=f'../logs/{model_name}')]

### AlphaPooling

In [45]:
with open("dummy.txt", "w") as f:
    f.write("")

In [12]:
x1 = torch.tensor(
    [
        [[1,2,3,4,5],
        [1,2,3,4,5]],
        
        [[1,2,3,4,5],
        [1,2,3,4,5]],
        
        [[1,2,3,4,5],
        [1,2,3,4,5]],
    ]
).float()

In [14]:
class AlphaPool(nn.Module):
    def __init__(self, alpha:float=1., eps:float=1e-8):
        super().__init__()
        self.alpha = nn.Parameter(tensor([0.]))   
        self.eps = eps
        
    def forward(self, x): 
        "Creates alpha-pooling features from a CNN like feature map"
        self.alpha.data.sigmoid_()
#         print(self.alpha)
        b,fn,h,w = x.shape
        x = x.view(b,fn,h*w)

        x1 = torch.sign(x)*torch.sqrt(((torch.abs(x) + 1e-5)**(self.alpha)))
        x1 = x1.permute(0,2,1).contiguous().unsqueeze(2)
#         print(x1[0])
        x2 = x.permute(0,2,1).contiguous().unsqueeze(3)
#         print(x2[0])
        x = (x1*x2).view(b,h*w,-1)
#         print(x[0])
        x = F.normalize(x.mean(dim=1))
        return x

In [15]:
alpha_pool = AlphaPool()
x1 = torch.randn((32,512,4,4))
f = alpha_pool(x1); f.shape

torch.Size([32, 262144])

In [16]:
alpha_pool.alpha.grad

In [17]:
torch.isnan(f).sum()

tensor(0)

### Custom Head

In [18]:
from fastai.vision.learner import cnn_config

In [19]:
def create_head(nf:int, nc:int, lin_ftrs:Optional[Collection[int]]=None, ps:Floats=0.5,
                concat_pool:bool=True,alpha_pool:bool=True, bn_final:bool=False):
    "Model head that takes `nf` features, runs through `lin_ftrs`, and about `nc` classes."
    lin_ftrs = [nf, 512, nc] if lin_ftrs is None else [nf] + lin_ftrs + [nc]
    ps = listify(ps)
    if len(ps) == 1: ps = [ps[0]/2] * (len(lin_ftrs)-2) + ps
    actns = [nn.ReLU(inplace=True)] * (len(lin_ftrs)-2) + [None]
    pool = AdaptiveConcatPool2d() if concat_pool else nn.AdaptiveAvgPool2d(1)
    pool = AlphaPool() if alpha_pool else pool
    layers = [pool, Flatten()]
    for ni,no,p,actn in zip(lin_ftrs[:-1], lin_ftrs[1:], ps, actns):
        layers += bn_drop_lin(ni, no, True, p, actn)
    if bn_final: layers.append(nn.BatchNorm1d(lin_ftrs[-1], momentum=0.01))
    return nn.Sequential(*layers)

def create_cnn_model(base_arch:Callable, nc:int, cut:Union[int,Callable]=None, pretrained:bool=True,
        lin_ftrs:Optional[Collection[int]]=None, ps:Floats=0.5, custom_head:Optional[nn.Module]=None,
        split_on:Optional[SplitFuncOrIdxList]=None, bn_final:bool=False, concat_pool:bool=True):
    "Create custom convnet architecture"
    body = create_body(base_arch, pretrained, cut)
    if custom_head is None:
        nf = num_features_model(nn.Sequential(*body.children())) * (2 if concat_pool else 1)
        head = create_head(nf, nc, lin_ftrs, ps=ps, concat_pool=concat_pool, bn_final=bn_final)
    else: head = custom_head
    return nn.Sequential(body, head)

def cnn_learner(data:DataBunch, base_arch:Callable, cut:Union[int,Callable]=None, pretrained:bool=True,
                lin_ftrs:Optional[Collection[int]]=None, ps:Floats=0.5, custom_head:Optional[nn.Module]=None,
                split_on:Optional[SplitFuncOrIdxList]=None, bn_final:bool=False, init=nn.init.kaiming_normal_,
                concat_pool:bool=True, **kwargs:Any)->Learner:
    "Build convnet style learner."
    meta = cnn_config(base_arch)
    model = create_cnn_model(base_arch, data.c, cut, pretrained, lin_ftrs, ps=ps, custom_head=custom_head,
        split_on=split_on, bn_final=bn_final, concat_pool=concat_pool)
    learn = Learner(data, model, **kwargs)
    learn.split(split_on or meta['split'])
    if pretrained: learn.freeze()
    if init: apply_init(model[1], init)
    return learn

In [20]:
custom_head = create_head(512**2, data.c)

In [21]:
custom_head

Sequential(
  (0): AlphaPool()
  (1): Flatten()
  (2): BatchNorm1d(262144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.25)
  (4): Linear(in_features=262144, out_features=512, bias=True)
  (5): ReLU(inplace)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.5)
  (8): Linear(in_features=512, out_features=10, bias=True)
)

### xresnet

In [38]:
from models import *

In [41]:
arch = xresnet50_sa
learn = cnn_learner(data=data, 
                    custom_head=custom_head,
                    base_arch=arch,
                    pretrained=False, 
                    metrics=[accuracy],
                    callbacks=learn_callbacks,
                    callback_fns=learn_callback_fns)

In [42]:
learn.model

Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
    )
    (2): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
    )
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): ResBlock(
        (convs): Sequential(
          (0): Sequential(
            (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (

In [21]:
learn.loss_func

FlattenedLoss of CrossEntropyLoss()

In [20]:
learn.model[1]

Sequential(
  (0): AlphaPool()
  (1): Flatten()
  (2): BatchNorm1d(262144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.25)
  (4): Linear(in_features=262144, out_features=512, bias=True)
  (5): ReLU(inplace)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.5)
  (8): Linear(in_features=512, out_features=10, bias=True)
)

In [21]:
# learn.lr_find()
# learn.recorder.plot()

In [22]:
# learn.mixup(0.2)
# learn.to_fp16()
# learn.loss_func = LabelSmoothingCrossEntropy()

In [23]:
learn.fit_one_cycle(50, max_lr=3e-3)

epoch,train_loss,valid_loss,accuracy,time
0,2.356452,2.053446,0.229719,01:28
1,2.251803,2.046087,0.257831,01:25
2,2.139973,2.024593,0.275502,01:26
3,2.034861,3.12932,0.166265,02:16
4,1.951767,2.038339,0.289157,03:06


Better model found at epoch 0 with accuracy value: 0.229718878865242.
Better model found at epoch 1 with accuracy value: 0.257831335067749.
Better model found at epoch 2 with accuracy value: 0.2755019962787628.
Better model found at epoch 4 with accuracy value: 0.28915661573410034.


Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/fastai/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/ubuntu/anaconda3/envs/fastai/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/ubuntu/anaconda3/envs/fastai/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/ubuntu/anaconda3/envs/fastai/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/fastai/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/home/ubuntu/anaconda3/envs/fastai/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/ubuntu/anaconda3/envs/fast

KeyboardInterrupt: 

In [None]:
learn.callbacks = []
def TTA_score(load_pth='baseline'):
    learn.load(load_pth)
    preds = learn.TTA(ds_type=DatasetType.Test)
    test_preds = torch.argmax(preds[0], 1)
    test_preds = to_np(test_preds)
    test_labels = test_data.train_ds.y.items
    print(f"top1 acc: {np.mean(test_labels == test_preds)}")

def non_TTA_score(load_pth='baseline'):
    learn.load(load_pth)
    preds = learn.get_preds(ds_type=DatasetType.Test)
    test_preds = torch.argmax(preds[0], 1)
    test_preds = to_np(test_preds)
    test_labels = test_data.train_ds.y.items
    print(f"top1 acc: {np.mean(test_labels == test_preds)}")

In [None]:
non_TTA_score()

In [None]:
TTA_score()

### Bag of Tricks 

https://arxiv.org/pdf/1812.01187.pdf

#### 1. Large batch 

**Large Batch Size Training**

- Increase learning rate as lr x bs_new/bs_old

**Warmup with first m batches**

- Linearly increase learning rate to lr in first m batches 

**Set $\gamma$ = 0 in BN layers in ResBlocks**

- Mimics a network with less parameters at the beginning of training

**No wd (L2 reg) in bias, or BN params $\gamma, \beta$**

#### 2. Low Precision

**FP16 Training**

#### 3. ResNet Tweaks

ResNetB, ResNetC, ResNetD...

#### 4. Cosine Annealing LR

#### 5. Label Smoothing

LabelSmoothingCrossEntropy()

#### 6. Student Teacher

$(p,softmax(z)) + T^{2}(softmax(r/T),softmax(z/T))$

#### 7. Mixup Training