<a href="https://colab.research.google.com/github/liuyao12/imagenette_experiments/blob/master/ResNet_separable.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet with depthwise separable convolution

> depthwise (x4) + Ranger + Mish + SA + MaxBlurPool + ResTrick

See summary at https://forums.fast.ai/t/imagenette-imagewoof-leaderboards/45822/47?u=liuyao 

## Imagewoof Leaderboard

| Size (px) | Epochs | URL | Accuracy | # Runs |
|--|--|--|--|--|
|128|5|depthwise double|**82.12**|5, mean|
|128|20|depthwise double|**88.93**|5, mean|
|128|80|depthwise(x4, stem+body) etc.|90.22%|1|
|128|200|depthwise(x4, stem+body) etc.|90.71%|1|
|192|5|depthwise double|**82.69**|5, mean|
|192|20|depthwise(x4) etc.|88.37%|5, mean|
|192|80|depthwise(x4m stem+body) etc.|91.44%|1|
|192|200|depthwise(x4) etc.|90.32%|1|
|256|5|depthwise(x4) etc.|82.33%|5, mean|
|256|20|[Resnet Trick + Mish + Sa + MaxBlurPool](https://github.com/ayasyrev/imagenette_experiments/blob/master/Woof_MaxBlurPool_ResnetTrick_s256bs32.ipynb)|88,58%|5, mean|
|256|80|depthwise(x4) etc.|90.93%|1|
|256|200|[fastai2 train_imagenette.py 2020-01](https://github.com/fastai/imagenette/blob/master/2020-01-train.md)|90.38%|1|


# setup and imports

In [None]:
# pip install kornia

In [None]:
pip install git+https://github.com/ayasyrev/model_constructor

In [None]:
pip install git+https://github.com/ayasyrev/imagenette_experiments

In [3]:
from fastai.basic_train import *
from fastai.vision import *
# from fastai.script import *

In [4]:
from kornia.contrib import MaxBlurPool2d

In [5]:
from imagenette_experiments.train_utils import *
from model_constructor.net import Net, act_fn
from model_constructor.layers import SimpleSelfAttention, ConvLayer

# ResBlock

In [6]:
class NewLayer(nn.Sequential):
    """Basic conv layers block"""
    def __init__(self, ni, nf, ks=3, stride=1,
            act=True,  act_fn=nn.ReLU(inplace=True),
            bn_layer=True, bn_1st=True, zero_bn=False,
            padding=None, bias=False, groups=1, **kwargs):

        if padding==None: padding = ks//2
        if ks==3 and groups==1:  # to be used for the "stem" of ResNet
          layers = [('Conv3x3', nn.Conv2d(ni, ni*4, 3, stride=1, padding=1, bias=bias, groups=ni)),
                    ('Conv1x1', nn.Conv2d(ni*4, nf, 1, bias=bias, groups=1))]
        else:
          layers = [('Conv{}x{}'.format(ks,ks), 
                      nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]

        act_bn = [('act_fn', act_fn)] if act else []
        if bn_layer:
            bn = nn.BatchNorm2d(nf)
            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
            act_bn += [('bn', bn)]
        if bn_1st: act_bn.reverse()
        layers += act_bn
        super().__init__(OrderedDict(layers))

In [10]:
class NewResBlock(Module):
    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True):
        nf,ni = nh*expansion,ni*expansion
        conv_layer = NewLayer
        self.reduce = noop if stride==1 else pool
        layers  = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_layer=True))
        ] if expansion == 1 else [
                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh*4, 3, groups=nh, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nh*4, 3, groups=nh, act=False, bn_layer=False)),
                   (f"conv_2", conv_layer(nh*4, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
        ]
        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
        self.merge = act_fn

    def forward(self, x):
        o = self.reduce(x)
        return self.merge(self.convs(o) + self.idconv(o))

# Model Constructor

In [11]:
model = Net(c_out=10, layers=[3,6,8,3], expansion=4)
model.block = NewResBlock
model.conv_layer = NewLayer # for the stem
pool = MaxBlurPool2d(3, True)
model.pool = pool
model.stem_pool = pool
model.stem_sizes = [3,32,64,64]
model.act_fn = Mish()
model.sa = True
res = []

# results

## size=128


### epochs=5, 5 runs

no stride, bs=16, both act and bn

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.853887,1.693871,0.475185,0.900229,03:58
1,1.597855,1.421064,0.609824,0.941715,03:58
2,1.399992,1.231845,0.69916,0.966149,03:58
3,1.232522,1.124027,0.758463,0.966404,03:58
4,1.014952,1.00175,0.81522,0.981675,03:59




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.82989,1.758396,0.432426,0.883431,03:58
1,1.528203,1.49581,0.57012,0.933571,03:58
2,1.343309,1.279568,0.694833,0.963095,03:58
3,1.255255,1.147663,0.739374,0.970221,03:59
4,1.041715,1.004069,0.814966,0.983202,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.86097,1.700778,0.465258,0.904556,03:58
1,1.538369,1.375021,0.638585,0.94477,03:59
2,1.367675,1.224978,0.705014,0.961568,03:59
3,1.180613,1.094188,0.775261,0.971749,03:58
4,0.987883,0.977362,0.826419,0.98142,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.841401,1.678582,0.477475,0.906847,03:58
1,1.492334,1.419619,0.627641,0.952914,03:59
2,1.348735,1.279629,0.688216,0.963604,03:58
3,1.239337,1.132548,0.759735,0.972258,03:59
4,1.00099,0.989038,0.825146,0.981675,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.778878,1.642192,0.494273,0.912955,03:59
1,1.534141,1.428439,0.608552,0.942734,03:58
2,1.349526,1.314455,0.668872,0.956477,03:58
3,1.221146,1.120293,0.757699,0.973021,03:58
4,1.0059,0.979379,0.824128,0.980657,03:59


[0.81522, 0.814966, 0.826419, 0.825146, 0.824128] 0.8211758852005004


### epochs=20, 5 runs

no stride, bs=16, both act and bn

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.801173,1.698625,0.481547,0.901502,03:58
1,1.522946,1.387382,0.615933,0.950369,03:58
2,1.317431,1.288623,0.673963,0.955714,03:58
3,1.227739,1.129893,0.753881,0.970221,03:59
4,1.124146,1.070666,0.784424,0.97633,03:59
5,1.114767,1.045305,0.792823,0.975566,03:59
6,1.050665,1.001708,0.811402,0.976584,03:59
7,0.972466,0.985769,0.814966,0.983711,03:59
8,0.937151,0.992524,0.819292,0.980402,03:59
9,0.907048,0.947411,0.835073,0.980148,03:59




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.835402,1.751453,0.438534,0.890303,03:59
1,1.525845,1.374969,0.634004,0.949606,03:59
2,1.332278,1.198569,0.716213,0.966404,03:59
3,1.251647,1.178239,0.726903,0.965386,03:59
4,1.116337,1.071907,0.775515,0.972512,03:59
5,1.115612,1.07113,0.774243,0.972512,03:59
6,1.070621,1.021311,0.797149,0.980148,03:59
7,0.977338,0.975542,0.81802,0.978621,03:59
8,0.939719,0.970199,0.813693,0.977602,03:59
9,0.92381,0.915703,0.847035,0.984474,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.896753,1.734423,0.455078,0.906847,04:00
1,1.556807,1.403181,0.618223,0.946042,03:59
2,1.355759,1.273733,0.685925,0.955714,04:00
3,1.304199,1.166618,0.747518,0.966658,04:00
4,1.133599,1.094989,0.770171,0.97073,04:00
5,1.06229,1.069088,0.787732,0.969458,04:00
6,1.064758,1.01365,0.802494,0.979639,04:00
7,0.98438,0.957368,0.833291,0.982693,04:00
8,0.946529,0.996385,0.807076,0.975312,04:00
9,0.933666,0.947251,0.823619,0.980911,04:00


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.847284,1.775889,0.440825,0.892848,04:00
1,1.598621,1.442757,0.600153,0.949096,04:00
2,1.376522,1.265322,0.691779,0.956986,04:00
3,1.241434,1.172649,0.734793,0.962586,04:00
4,1.163452,1.083677,0.776533,0.975566,04:00
5,1.123032,1.051371,0.790023,0.975057,04:00
6,1.039444,1.001302,0.803767,0.97913,04:00
7,1.000067,0.971766,0.819292,0.982184,04:00
8,0.963111,1.012285,0.798167,0.975057,03:59
9,0.957597,0.925587,0.838381,0.982693,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.863371,1.890844,0.407228,0.878595,03:59
1,1.587108,1.468102,0.586663,0.942988,04:00
2,1.39958,1.308397,0.661491,0.957496,04:00
3,1.273798,1.182499,0.730466,0.966149,03:59
4,1.181075,1.14516,0.740392,0.969712,03:59
5,1.089572,1.120198,0.752609,0.973021,04:00
6,1.049326,1.013096,0.798422,0.979893,03:59
7,1.028194,1.039928,0.796131,0.974548,03:59
8,0.965961,0.990829,0.81242,0.976075,03:59
9,0.950209,0.971756,0.817765,0.980657,04:00


[0.890303, 0.88903, 0.890812, 0.890048, 0.886231] 0.8892848134040833


### epochs=80, 1 run

no stride, bs=16, both act and bn

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

## size=192

### epochs=5, 5 runs

no stride, bs=16, act=False, bn=False

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.856656,1.887479,0.400611,0.835836,07:11
1,1.61611,1.414912,0.620005,0.937898,07:11
2,1.392715,1.29101,0.676508,0.959786,07:11
3,1.269173,1.158866,0.738356,0.969203,07:11
4,1.050011,1.002696,0.822601,0.982438,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.844413,1.741399,0.445915,0.89692,07:11
1,1.561381,1.435193,0.601425,0.94197,07:11
2,1.398485,1.278045,0.679053,0.960041,07:11
3,1.272492,1.132732,0.754136,0.971494,07:11
4,1.058501,1.011058,0.813184,0.979384,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.845423,1.730929,0.465258,0.905319,07:11
1,1.53599,1.419719,0.618223,0.943497,07:10
2,1.377856,1.211514,0.715449,0.961059,07:11
3,1.239511,1.131984,0.749046,0.972003,07:11
4,1.064996,1.000053,0.813948,0.983202,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.850057,1.804008,0.418682,0.88674,07:10
1,1.552947,1.44266,0.597098,0.942479,07:11
2,1.384726,1.285309,0.679053,0.959023,07:10
3,1.249545,1.141736,0.743701,0.972258,07:11
4,1.064263,1.000224,0.822856,0.975566,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.851692,1.754275,0.448206,0.880122,07:10
1,1.57925,1.476122,0.5803,0.939679,07:11
2,1.406376,1.354866,0.640366,0.956477,07:11
3,1.256694,1.167863,0.746246,0.968694,07:11
4,1.02072,1.003255,0.817002,0.98142,07:11




[0.822601, 0.813184, 0.813948, 0.822856, 0.817002] 0.8179180502891541


no stride, bs=16, act=True, bn=False

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.816367,1.742989,0.460932,0.905574,07:18
1,1.570518,1.470981,0.579537,0.946806,07:19
2,1.335072,1.309832,0.661237,0.957496,07:18
3,1.251946,1.125895,0.758972,0.973021,07:18
4,1.014469,0.983681,0.824637,0.981675,07:18




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.812879,1.758666,0.453041,0.890812,07:18
1,1.553075,1.417638,0.613642,0.945788,07:18
2,1.365519,1.28096,0.685671,0.955205,07:18
3,1.200144,1.240493,0.729702,0.970476,07:18
4,1.026837,0.98004,0.829473,0.981166,07:19




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.844059,1.720307,0.466785,0.901502,07:18
1,1.598753,1.470148,0.602698,0.942479,07:18
2,1.341149,1.255469,0.686689,0.963095,07:18
3,1.243953,1.122435,0.754645,0.969203,07:18
4,1.031597,0.99657,0.824637,0.98142,07:18




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.82138,1.678589,0.481802,0.910919,07:18
1,1.630918,1.482963,0.576228,0.937643,07:18
2,1.379747,1.222471,0.704505,0.96844,07:18
3,1.262058,1.147718,0.755663,0.968949,07:18
4,1.063779,0.986779,0.833291,0.982947,07:19




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.891106,1.707774,0.461441,0.902774,07:18
1,1.606799,1.477212,0.581064,0.935353,07:18
2,1.420447,1.298537,0.672436,0.959786,07:18
3,1.245861,1.184937,0.736829,0.969712,07:19
4,1.032215,0.994109,0.822601,0.978621,07:18




[0.824637, 0.829473, 0.824637, 0.833291, 0.822601] 0.8269279718399047


### epochs=20, 5 runs

no stride, bs=16, no act no bn

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

### epochs=80, 1 run

no stride, bs=16, no act no bn

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

### epochs=200, 1 run

no stride, bs=16, no act no bn

In [None]:
res = []
for epochs in [200]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

## size=256

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))