<a href="https://colab.research.google.com/github/liuyao12/imagenette_experiments/blob/master/ResNet_twist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet with a Twist

> with depthwise (x4) + Ranger + Mish + SA + MaxBlurPool + ResTrick

See blog https://liuyao12.github.io/blog/research/2020/03/07/Conv-Twist.html

See summary at https://forums.fast.ai/t/imagenette-imagewoof-leaderboards/45822/47?u=liuyao 

## Imagewoof Leaderboard

Imagewoof2, with a 70/30 train/test ratio.

| Size (px) | Epochs | SoTA| x2 | x4 | x4 twist | x6 | x4 double | runs |
|--|--|--| --|--| --|--|--|--|
|128|5|73.37|75.19|76.27||76.61| **82.12**|5, mean
|128|20|85.52|85.18|86.22||86.27| **88.93**|5, mean
|128|80|87.20|87.70|87.83||87.65| **90.15**|1
|128|200|87.20|
|192|5|77.87|79.86|81.15|80.73|| **82.69**|5, mean
|192|20|87.85|88.12|88.37|88.28|
|192|80|89.21|90.30|89.89|89.38|| **92.08** |
|192|200|89.54
|256|5|
|256|20|
|256|80|
|256|200|


# setup and imports

In [None]:
# pip install kornia

In [None]:
pip install git+https://github.com/ayasyrev/model_constructor

In [None]:
pip install git+https://github.com/ayasyrev/imagenette_experiments

In [4]:
from fastai.basic_train import *
from fastai.vision import *
# from fastai.script import *

In [5]:
from kornia.contrib import MaxBlurPool2d

In [6]:
from imagenette_experiments.train_utils import *
from model_constructor.net import Net, act_fn
from model_constructor.layers import SimpleSelfAttention, ConvLayer

# ResBlock

In [7]:
class MnM(nn.Module): # Mix and Multiply
    def __init__(self, channels, group_size):
        super().__init__()
        self.channels = channels
        self.gs = group_size
        n = channels//group_size*2
        self.conv = nn.Conv2d(n, n*4, 1, groups=n, bias=True)
        self.XY = None

    def forward(self, x): 
        N,C,H,W = x.size()
        # x1 = x.view(N,-1,self.gs,H,W)[:,:,:-2].reshape(N,-1,H,W)
        x2 = x.view(N,-1,self.gs,H,W)[:,:,-2:].reshape(N,-1,H,W)
        if self.XY is None:
            XX = torch.from_numpy(np.indices((1,H,W))[2]*2/W-1)
            YY = torch.from_numpy(np.indices((1,H,W))[1]*2/H-1)
            g = self.channels//self.gs*4
            self.XY = torch.cat([XX,YY]*g, dim=0).to(x.device).type(x.dtype)
        twist = self.conv(x2)*self.XY
        twist = torch.sum(twist.view(N,-1,2,2,H,W), dim=2).reshape(N,-1,H,W)
        return torch.cat([x, twist], dim=1)

In [8]:
class NewLayer(nn.Sequential):
    """Basic conv layers block"""
    def __init__(self, ni, nf, ks=3, stride=1,
            act=True,  act_fn=nn.ReLU(inplace=True),
            bn_layer=True, bn_1st=True, zero_bn=False,
            padding=None, bias=False, groups=1, **kwargs):

        if padding==None: padding = ks//2
        if ks==3 and groups==1:  # to be used for the "stem" of ResNet
          # if ni==3: stride = 2
          layers = [('Conv3x3', nn.Conv2d(ni, ni*dm, 3, stride=stride, padding=1, bias=bias, groups=ni)),
                    ('Conv1x1', nn.Conv2d(ni*dm, nf, 1, bias=bias, groups=1))]
        else:
          layers = [('Conv{}x{}'.format(ks,ks), 
                      nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]

        act_bn = [('act_fn', act_fn)] if act else []
        if bn_layer:
            bn = nn.BatchNorm2d(nf)
            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
            act_bn += [('bn', bn)]
        if bn_1st: act_bn.reverse()
        layers += act_bn
        super().__init__(OrderedDict(layers))

In [13]:
class NewResBlock(Module):
    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, groups=1):
        nf,ni = nh*expansion,ni*expansion
        conv_layer = NewLayer
        self.reduce = noop if stride==1 else pool
        layers  = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(ni, nf, 3, zero_bn=zero_bn, act=False, bn_layer=True))
        ] if expansion == 1 else [
                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nh*dm, 3, groups=nh, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh*dm, 3, groups=nh, act=False, bn_layer=False)),
                   (f"MnM", MnM(nf, dm)),
                   (f"conv_2", conv_layer(nh*(dm+4), nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
        ]
        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
        self.merge = act_fn

    def forward(self, x):
        o = self.reduce(x)
        return self.merge(self.convs(o) + self.idconv(o))

# Model Constructor

In [10]:
model = Net(c_out=10, layers=[3,6,8,3], expansion=4)
model.block = NewResBlock
model.conv_layer = NewLayer # for the stem
pool = MaxBlurPool2d(3, True)
model.pool = pool
model.stem_sizes = [3,32,64,64]
model.act_fn = Mish()
model.sa = True

## Experiment

In [12]:
dm = 4
res = dict()
for ep in [80]: #*5 + [20] + [80]:
    mixup=0 if ep<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(ep, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    acc = learn.recorder.metrics[-1][0].item()
    res[ep] = [acc] if ep not in res else res[ep] + [acc]
    print('{} epochs: {} ({} runs)'.format(ep, sum(res[ep])/len(res[ep]), len(res[ep])))
print('depth multiplier={}'.format(dm), {ep: sum(res[ep])/len(res[ep]) for ep in res})

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.00625,1.893326,0.39603,0.856197,03:30
1,1.772559,1.568457,0.529397,0.920845,03:29
2,1.663089,1.344732,0.636294,0.946297,03:30
3,1.557256,1.208704,0.715704,0.961313,03:30
4,1.44067,1.15695,0.744973,0.96564,03:30
5,1.406448,1.103513,0.760244,0.968949,03:30
6,1.327015,1.089078,0.764826,0.97124,03:30
7,1.270371,0.991219,0.814202,0.977093,03:31
8,1.28275,0.971349,0.814711,0.976584,03:30
9,1.213042,0.96096,0.820565,0.978621,03:30




80 epochs: 0.9002290368080139 (1 runs)
depth multiplier=4 {80: 0.9002290368080139}


In [14]:
for ep in [80]: #*5 + [20] + [80]:
    mixup=0 if ep<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(ep, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    acc = learn.recorder.metrics[-1][0].item()
    res[ep] = [acc] if ep not in res else res[ep] + [acc]
    print('{} epochs: {} ({} runs)'.format(ep, sum(res[ep])/len(res[ep]), len(res[ep])))
print('depth multiplier={}'.format(dm), {ep: sum(res[ep])/len(res[ep]) for ep in res})

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.998951,1.817244,0.413591,0.878341,03:32
1,1.75891,1.583864,0.528633,0.922372,03:31
2,1.636468,1.33483,0.657419,0.952914,03:31
3,1.563408,1.221549,0.712141,0.965895,03:31
4,1.456375,1.1578,0.743955,0.960295,03:31
5,1.43485,1.086016,0.769152,0.972258,03:31
6,1.314646,1.014699,0.805803,0.975821,03:31
7,1.313237,1.002816,0.808094,0.972512,03:31
8,1.275172,1.004361,0.801222,0.978112,03:31
9,1.221013,0.991231,0.81522,0.976584,03:30


80 epochs: 0.9006108343601227 (2 runs)
depth multiplier=4 {80: 0.9006108343601227}
