<a href="https://colab.research.google.com/github/liuyao12/imagenette_experiments/blob/master/ResNet_separable.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet with depthwise separable convolution

> depthwise (x4) + Ranger + Mish + SA + MaxBlurPool + ResTrick

See summary at https://forums.fast.ai/t/imagenette-imagewoof-leaderboards/45822/47?u=liuyao 

# setup and imports

In [None]:
# pip install kornia

In [None]:
pip install git+https://github.com/ayasyrev/model_constructor

In [None]:
pip install git+https://github.com/ayasyrev/imagenette_experiments

In [3]:
from imagenette_experiments.train_utils import *

In [4]:
from kornia.contrib import MaxBlurPool2d

In [5]:
from fastai.basic_train import *
from fastai.vision import *
# from fastai.script import *
from model_constructor.net import Net, act_fn
from model_constructor.layers import SimpleSelfAttention, ConvLayer

# ResBlock

In [6]:
class NewLayer(nn.Sequential):
    """Basic conv layers block"""
    def __init__(self, ni, nf, ks=3, stride=1,
            act=True,  act_fn=nn.ReLU(inplace=True),
            bn_layer=True, bn_1st=True, zero_bn=False,
            padding=None, bias=False, groups=1, **kwargs):

        if padding==None: padding = ks//2
        if ks==3 and groups==1:  # to be used for the "stem" of ResNet
          layers = [('Conv3x3', nn.Conv2d(ni, ni*4, 3, stride=1, padding=1, bias=bias, groups=ni)),
                    ('Conv1x1', nn.Conv2d(ni*4, nf, 1, bias=bias, groups=1))]
        else:
          layers = [('Conv{}x{}'.format(ks,ks), 
                      nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]

        act_bn = [('act_fn', act_fn)] if act else []
        if bn_layer:
            bn = nn.BatchNorm2d(nf)
            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
            act_bn += [('bn', bn)]
        if bn_1st: act_bn.reverse()
        layers += act_bn
        super().__init__(OrderedDict(layers))

In [7]:
class NewResBlock(Module):
    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True):
        nf,ni = nh*expansion,ni*expansion
        conv_layer = NewLayer
        self.reduce = noop if stride==1 else pool
        layers  = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_layer=True))
        ] if expansion == 1 else [
                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nh*4, 3, groups=nh, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_2", conv_layer(nh*4, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
        ]
        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
        self.merge = act_fn

    def forward(self, x):
        o = self.reduce(x)
        return self.merge(self.convs(o) + self.idconv(o))

# Model Constructor

In [8]:
model = Net(c_out=10, layers=[3,6,8,3], expansion=4)
model.block = NewResBlock
model.conv_layer = NewLayer # for the stem
pool = MaxBlurPool2d(3, True)
model.pool = pool
model.stem_pool = pool
model.stem_sizes = [3,32,64,64]
model.act_fn = Mish()
model.sa = True
res = []

# results

## size=128


### epochs=5, 5 runs

no stride, bs=16, both act and bn

In [9]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.853887,1.693871,0.475185,0.900229,03:58
1,1.597855,1.421064,0.609824,0.941715,03:58
2,1.399992,1.231845,0.69916,0.966149,03:58
3,1.232522,1.124027,0.758463,0.966404,03:58
4,1.014952,1.00175,0.81522,0.981675,03:59


	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value)


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.82989,1.758396,0.432426,0.883431,03:58
1,1.528203,1.49581,0.57012,0.933571,03:58
2,1.343309,1.279568,0.694833,0.963095,03:58
3,1.255255,1.147663,0.739374,0.970221,03:59
4,1.041715,1.004069,0.814966,0.983202,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.86097,1.700778,0.465258,0.904556,03:58
1,1.538369,1.375021,0.638585,0.94477,03:59
2,1.367675,1.224978,0.705014,0.961568,03:59
3,1.180613,1.094188,0.775261,0.971749,03:58
4,0.987883,0.977362,0.826419,0.98142,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.841401,1.678582,0.477475,0.906847,03:58
1,1.492334,1.419619,0.627641,0.952914,03:59
2,1.348735,1.279629,0.688216,0.963604,03:58
3,1.239337,1.132548,0.759735,0.972258,03:59
4,1.00099,0.989038,0.825146,0.981675,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.778878,1.642192,0.494273,0.912955,03:59
1,1.534141,1.428439,0.608552,0.942734,03:58
2,1.349526,1.314455,0.668872,0.956477,03:58
3,1.221146,1.120293,0.757699,0.973021,03:58
4,1.0059,0.979379,0.824128,0.980657,03:59


[0.81522, 0.814966, 0.826419, 0.825146, 0.824128] 0.8211758852005004


### epochs=20, 5 runs

no stride, bs=16, both act and bn

In [9]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.801173,1.698625,0.481547,0.901502,03:58
1,1.522946,1.387382,0.615933,0.950369,03:58
2,1.317431,1.288623,0.673963,0.955714,03:58
3,1.227739,1.129893,0.753881,0.970221,03:59
4,1.124146,1.070666,0.784424,0.97633,03:59
5,1.114767,1.045305,0.792823,0.975566,03:59
6,1.050665,1.001708,0.811402,0.976584,03:59
7,0.972466,0.985769,0.814966,0.983711,03:59
8,0.937151,0.992524,0.819292,0.980402,03:59
9,0.907048,0.947411,0.835073,0.980148,03:59


	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value)


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.835402,1.751453,0.438534,0.890303,03:59
1,1.525845,1.374969,0.634004,0.949606,03:59
2,1.332278,1.198569,0.716213,0.966404,03:59
3,1.251647,1.178239,0.726903,0.965386,03:59
4,1.116337,1.071907,0.775515,0.972512,03:59
5,1.115612,1.07113,0.774243,0.972512,03:59
6,1.070621,1.021311,0.797149,0.980148,03:59
7,0.977338,0.975542,0.81802,0.978621,03:59
8,0.939719,0.970199,0.813693,0.977602,03:59
9,0.92381,0.915703,0.847035,0.984474,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.896753,1.734423,0.455078,0.906847,04:00
1,1.556807,1.403181,0.618223,0.946042,03:59
2,1.355759,1.273733,0.685925,0.955714,04:00
3,1.304199,1.166618,0.747518,0.966658,04:00
4,1.133599,1.094989,0.770171,0.97073,04:00
5,1.06229,1.069088,0.787732,0.969458,04:00
6,1.064758,1.01365,0.802494,0.979639,04:00
7,0.98438,0.957368,0.833291,0.982693,04:00
8,0.946529,0.996385,0.807076,0.975312,04:00
9,0.933666,0.947251,0.823619,0.980911,04:00


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.847284,1.775889,0.440825,0.892848,04:00
1,1.598621,1.442757,0.600153,0.949096,04:00
2,1.376522,1.265322,0.691779,0.956986,04:00
3,1.241434,1.172649,0.734793,0.962586,04:00
4,1.163452,1.083677,0.776533,0.975566,04:00
5,1.123032,1.051371,0.790023,0.975057,04:00
6,1.039444,1.001302,0.803767,0.97913,04:00
7,1.000067,0.971766,0.819292,0.982184,04:00
8,0.963111,1.012285,0.798167,0.975057,03:59
9,0.957597,0.925587,0.838381,0.982693,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.863371,1.890844,0.407228,0.878595,03:59
1,1.587108,1.468102,0.586663,0.942988,04:00
2,1.39958,1.308397,0.661491,0.957496,04:00
3,1.273798,1.182499,0.730466,0.966149,03:59
4,1.181075,1.14516,0.740392,0.969712,03:59
5,1.089572,1.120198,0.752609,0.973021,04:00
6,1.049326,1.013096,0.798422,0.979893,03:59
7,1.028194,1.039928,0.796131,0.974548,03:59
8,0.965961,0.990829,0.81242,0.976075,03:59
9,0.950209,0.971756,0.817765,0.980657,04:00


[0.890303, 0.88903, 0.890812, 0.890048, 0.886231] 0.8892848134040833


### epochs=80, 1 run

no stride, bs=16, both act and bn

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.964958,1.752915,0.441334,0.900229,04:00
1,1.756553,1.490821,0.57241,0.939425,04:00
2,1.594907,1.293326,0.657928,0.955205,04:00
3,1.462479,1.238182,0.70196,0.961568,04:00
4,1.442581,1.144526,0.743701,0.965131,04:00
5,1.359861,1.062154,0.784424,0.973785,04:00
6,1.335843,1.023307,0.798167,0.975566,04:00
7,1.270202,0.984475,0.81802,0.979893,04:01
8,1.26239,0.988003,0.819038,0.977857,04:00
9,1.215874,0.970355,0.822601,0.977602,04:00


## size=192

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
res = []
for epochs in [200]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

## size=256

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))