<a href="https://colab.research.google.com/github/liuyao12/imagenette_experiments/blob/master/ResNet_twist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet with a Twist

> with depthwise (x4) + Ranger + Mish + SA + MaxBlurPool + ResTrick

See blog https://liuyao12.github.io/blog/research/2020/03/07/Conv-Twist.html

See summary at https://forums.fast.ai/t/imagenette-imagewoof-leaderboards/45822/47?u=liuyao 

## Imagewoof Leaderboard

Imagewoof2, with a 70/30 train/test ratio.

| Size (px) | Epochs | SoTA| x2 | x4 | x4 twist | x6 | x4 double | runs |
|--|--|--| --|--| --|--|--|--|
|128|5|73.37|75.19|76.27||76.61| **82.12**|5, mean
|128|20|85.52|85.18|86.22||86.27| **88.93**|5, mean
|128|80|87.20|87.70|87.83||87.65| **90.15**|1
|128|200|87.20|
|192|5|77.87|79.86|81.15|80.73|| **82.69**|5, mean
|192|20|87.85|88.12|88.37|88.28|
|192|80|89.21|90.30|89.89|89.38|| **92.08** |
|192|200|89.54
|256|5|
|256|20|
|256|80|
|256|200|


# setup and imports

In [None]:
# pip install kornia

In [None]:
pip install git+https://github.com/ayasyrev/model_constructor

In [None]:
pip install git+https://github.com/ayasyrev/imagenette_experiments

In [3]:
from fastai.basic_train import *
from fastai.vision import *
# from fastai.script import *

In [4]:
from kornia.contrib import MaxBlurPool2d

In [5]:
from imagenette_experiments.train_utils import *
from model_constructor.net import Net, act_fn
from model_constructor.layers import SimpleSelfAttention, ConvLayer

# ResBlock

In [32]:
class MnM(nn.Module): # Mix and Multiply
    def __init__(self, channels, group_size):
        super().__init__()
        self.channels = channels
        self.gs = group_size
        n = channels//group_size*2
        self.conv = nn.Conv2d(n, n*2, 1, groups=n, bias=True)
        self.XY = None

    def forward(self, x): 
        N,C,H,W = x.size()
        # x1 = x.view(N,-1,self.gs,H,W)[:,:,:-2].reshape(N,-1,H,W)
        x2 = x.view(N,-1,self.gs,H,W)[:,:,-2:].reshape(N,-1,H,W)
        if self.XY is None:
            XX = torch.from_numpy(np.indices((1,H,W))[2]*2/W-1)
            YY = torch.from_numpy(np.indices((1,H,W))[1]*2/H-1)
            g = self.channels//self.gs*2
            self.XY = torch.cat([XX,YY]*g, dim=0).to(x.device).type(x.dtype)
        twist = self.conv(x2)*self.XY
        twist = torch.sum(twist.view(N,-1,2,2,H,W), dim=2).reshape(N,-1,H,W)
        return torch.cat([x, twist], dim=1)

In [36]:
class NewLayer(nn.Sequential):
    """Basic conv layers block"""
    def __init__(self, ni, nf, ks=3, stride=1,
            act=True,  act_fn=nn.ReLU(inplace=True),
            bn_layer=True, bn_1st=True, zero_bn=False,
            padding=None, bias=False, groups=1, **kwargs):

        if padding==None: padding = ks//2
        if ks==3 and groups==1:  # to be used for the "stem" of ResNet
          # if ni==3: stride = 2
          layers = [('Conv3x3', nn.Conv2d(ni, ni*dm, 3, stride=stride, padding=1, bias=bias, groups=ni)),
                    ('Conv1x1', nn.Conv2d(ni*dm, nf, 1, bias=bias, groups=1))]
        else:
          layers = [('Conv{}x{}'.format(ks,ks), 
                      nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]

        act_bn = [('act_fn', act_fn)] if act else []
        if bn_layer:
            bn = nn.BatchNorm2d(nf)
            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
            act_bn += [('bn', bn)]
        if bn_1st: act_bn.reverse()
        layers += act_bn
        super().__init__(OrderedDict(layers))

In [35]:
class NewResBlock(Module):
    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, groups=1):
        nf,ni = nh*expansion,ni*expansion
        conv_layer = NewLayer
        self.reduce = noop if stride==1 else pool
        layers  = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(ni, nf, 3, zero_bn=zero_bn, act=False, bn_layer=True))
        ] if expansion == 1 else [
                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh*dm, 3, groups=nh, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nh*dm, 3, groups=nh, act=False, bn_layer=False)),
                   (f"MnM", MnM(nf, dm)),
                   (f"conv_2", conv_layer(nh*(dm+2), nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
        ]
        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
        self.merge = act_fn

    def forward(self, x):
        o = self.reduce(x)
        return self.merge(self.convs(o) + self.idconv(o))

# Model Constructor

In [9]:
model = Net(c_out=10, layers=[3,6,8,3], expansion=4)
model.block = NewResBlock
model.conv_layer = NewLayer # for the stem
pool = MaxBlurPool2d(3, True)
model.pool = pool
model.stem_sizes = [3,32,64,64]
model.act_fn = Mish()
model.sa = True

## Experiment

In [38]:
dm = 4
res = dict()
for ep in [5]*5 + [20] + [80]:
    mixup=0 if ep<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(ep, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    acc = learn.recorder.metrics[-1][0].item()
    res[ep] = [acc] if ep not in res else res[ep] + [acc]
    print('{} epochs: {} ({} runs)'.format(ep, sum(res[ep])/len(res[ep]), len(res[ep])))
print('depth multiplier={}'.format(dm), {ep: sum(res[ep])/len(res[ep]) for ep in res})

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.90554,1.814509,0.409519,0.880377,02:27
1,1.671987,1.491707,0.561721,0.935607,02:27
2,1.451532,1.29497,0.665309,0.95495,02:28
3,1.311999,1.187245,0.720031,0.964113,02:26
4,1.11996,1.02622,0.810639,0.978875,02:28


5 epochs: 0.8106388449668884 (1 runs)
data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.88251,1.744457,0.441079,0.902265,02:28
1,1.633589,1.500231,0.574701,0.931026,02:28
2,1.409354,1.397438,0.613642,0.951896,02:27
3,1.297217,1.203432,0.718758,0.963095,02:27
4,1.063921,1.034552,0.797149,0.976075,02:27


5 epochs: 0.8038941323757172 (2 runs)
data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.846126,1.748649,0.444897,0.896666,02:27
1,1.614222,1.568512,0.548231,0.930517,02:27
2,1.448857,1.347386,0.643166,0.953169,02:26
3,1.310275,1.211324,0.711886,0.96564,02:26
4,1.12051,1.031483,0.798931,0.981675,02:25




5 epochs: 0.8022397557894388 (3 runs)
data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.863362,1.717335,0.453041,0.895139,02:26
1,1.60484,1.443263,0.587936,0.941715,02:26
2,1.435408,1.284409,0.6704,0.959532,02:26
3,1.276258,1.154402,0.737847,0.968949,02:26
4,1.065301,1.009147,0.809875,0.977857,02:26




5 epochs: 0.8041486442089081 (4 runs)
data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.905647,1.734021,0.446933,0.899466,02:26
1,1.592445,1.469542,0.567574,0.941715,02:26




Buffered data was truncated after reaching the output size limit.

In [39]:
print(res)
print('depth multiplier={}'.format(dm), {e: sum(res[e])/len(res[e]) for e in res})

{5: [0.8106388449668884, 0.7971494197845459, 0.7989310026168823, 0.8098753094673157, 0.8088572025299072], 20: [0.8875032067298889], 80: [0.8946296572685242]}
depth multiplier=4 {5: 0.805090355873108, 20: 0.8875032067298889, 80: 0.8946296572685242}


In [19]:
for e in [200]: #*5 + [20] + [80]:
  mixup=0 if e<=20 else 0.2
  learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
  learn.fit_fc(e, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
  acc = learn.recorder.metrics[-1][0].item()
  if e in res:
    res[e] += [acc]
  else:
    res[e] = [acc]
  print('{} epochs: {} ({} runs)'.format(e, sum(res[e])/len(res[e]), len(res[e])))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.028049,1.774158,0.426063,0.880886,02:50
1,1.813608,1.551716,0.544922,0.925426,02:50
2,1.717604,1.439224,0.607279,0.939679,02:50
3,1.6155,1.326114,0.659455,0.949096,02:50
4,1.551526,1.227806,0.700687,0.959277,02:50
5,1.520333,1.218766,0.706032,0.963349,02:50
6,1.489147,1.15319,0.742683,0.966913,02:49
7,1.444332,1.14111,0.743446,0.966149,02:49
8,1.384485,1.117875,0.753118,0.968949,02:49
9,1.392756,1.119168,0.750827,0.967931,02:49


KeyboardInterrupt: ignored