<a href="https://colab.research.google.com/github/liuyao12/imagenette_experiments/blob/master/ResNet_separable.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ResNet with depthwise separable convolution

> depthwise (x4) + Ranger + Mish + SA + MaxBlurPool + ResTrick

See summary at https://forums.fast.ai/t/imagenette-imagewoof-leaderboards/45822/47?u=liuyao 

## Imagewoof Leaderboard

Imagewoof2, with a 70/30 train/test ratio.

| Size (px) | Epochs | URL | Accuracy | # Runs |
|--|--|--|--|--|
|128|5|depthwise (x4, double)|**82.12**|5, mean|
|128|20|depthwise (x4, double)|**88.93**|5, mean|
|128|80|depthwise (x4, double)|**90.15**|1|
|128|200|depthwise(x4, stem+body) etc.|90.71%|1|
|192|5|depthwise (x4, double)|**82.69**|5, mean|
|192|20|depthwise(x4) etc.|88.37%|5, mean|
|192|80|depthwise (x4, double)|**92.08**|1|
|192|200|depthwise(x4) etc.|90.32%|1|
|256|5|depthwise(x4) etc.|82.33%|5, mean|
|256|20|[Resnet Trick + Mish + Sa + MaxBlurPool](https://github.com/ayasyrev/imagenette_experiments/blob/master/Woof_MaxBlurPool_ResnetTrick_s256bs32.ipynb)|88,58%|5, mean|
|256|80|depthwise(x4) etc.|90.93%|1|
|256|200|[fastai2 train_imagenette.py 2020-01](https://github.com/fastai/imagenette/blob/master/2020-01-train.md)|90.38%|1|


# setup and imports

In [None]:
# pip install kornia

In [None]:
pip install git+https://github.com/ayasyrev/model_constructor

In [None]:
pip install git+https://github.com/ayasyrev/imagenette_experiments

In [4]:
from fastai.basic_train import *
from fastai.vision import *
# from fastai.script import *

In [5]:
from kornia.contrib import MaxBlurPool2d

In [6]:
from imagenette_experiments.train_utils import *
from model_constructor.net import Net, act_fn
from model_constructor.layers import SimpleSelfAttention, ConvLayer

# ResBlock

In [7]:
class NewLayer(nn.Sequential):
    """Basic conv layers block"""
    def __init__(self, ni, nf, ks=3, stride=1,
            act=True,  act_fn=nn.ReLU(inplace=True),
            bn_layer=True, bn_1st=True, zero_bn=False,
            padding=None, bias=False, groups=1, **kwargs):

        if padding==None: padding = ks//2
        if ks==3 and groups==1:  # to be used for the "stem" of ResNet
          # if ni==3: stride = 2
          layers = [('Conv3x3', nn.Conv2d(ni, ni*dm, 3, stride=stride, padding=1, bias=bias, groups=ni)),
                    ('Conv1x1', nn.Conv2d(ni*dm, nf, 1, bias=bias, groups=1))]
        else:
          layers = [('Conv{}x{}'.format(ks,ks), 
                      nn.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]

        act_bn = [('act_fn', act_fn)] if act else []
        if bn_layer:
            bn = nn.BatchNorm2d(nf)
            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
            act_bn += [('bn', bn)]
        if bn_1st: act_bn.reverse()
        layers += act_bn
        super().__init__(OrderedDict(layers))

In [8]:
class NewResBlock(Module):
    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, groups=1):
        nf,ni = nh*expansion,ni*expansion
        conv_layer = NewLayer
        self.reduce = noop if stride==1 else pool
        layers  = [# (f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(ni, nf, 3, zero_bn=zero_bn, act=False, bn_layer=True))
        ] if expansion == 1 else [
                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
                   # (f"conv_1", conv_layer(nh, nh*dm, 3, groups=nh, act_fn=act_fn, bn_1st=bn_1st)),
                   (f"conv_1", conv_layer(nh, nh*dm, 3, groups=nh, act=False, bn_layer=False)),
                   (f"conv_2", conv_layer(nh*dm, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
        ]
        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
        self.merge = act_fn

    def forward(self, x):
        o = self.reduce(x)
        return self.merge(self.convs(o) + self.idconv(o))

# Model Constructor

In [9]:
dm = 8 # depth multiplier
model = Net(c_out=10, layers=[3,6,8,3], expansion=4)
model.block = NewResBlock
model.conv_layer = NewLayer # for the stem
pool = MaxBlurPool2d(3, True)
model.pool = pool
# model.stem_pool = pool
model.stem_sizes = [3,32,64,64]
model.act_fn = Mish()
model.sa = True
res = []

## Experiment

Start with a model with `*2` in all the depthwise convolutions.

In [93]:
epochs = 60
mixup = 0 if epochs<=20 else 0.2
learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
print(learn.recorder.metrics[-1][0].item())

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.052636,1.938801,0.348944,0.834563,02:18
1,1.828844,1.628165,0.515398,0.9127,02:18
2,1.67696,1.429941,0.609061,0.935607,02:17
3,1.578073,1.276244,0.681344,0.958005,02:17
4,1.481553,1.214398,0.70425,0.957496,02:18
5,1.416507,1.187612,0.723339,0.96564,02:18
6,1.383856,1.080393,0.768389,0.967931,02:17
7,1.324159,1.050738,0.783405,0.973276,02:17
8,1.302396,1.006987,0.802749,0.975821,02:17
9,1.252993,0.984219,0.808603,0.976839,02:17




0.8979384303092957


In [94]:
learn.validate()



[0.77321756, tensor(0.8979), tensor(0.9855)]

In [95]:
state = learn.model.state_dict()
channels = 0
for name in state:
  if 'Conv3x3' in name and channels==0:
    a,b,c,d = state[name].size()
    double = state[name].unsqueeze(1).expand(a,2,b,c,d).reshape(a*2,b,c,d)
    state[name] = double / 1.414
    channels = a
  if 'Conv1x1' in name and state[name].size()[1]==channels:
    a,b,c,d = state[name].size()
    double = state[name].unsqueeze(2).expand(a,b,2,c,d).reshape(a,b*2,c,d)
    state[name] = double / 1.414
    channels = 0
print('done doubling')

done doubling


Here, manually change the `*2` to `*4` (in four places), rerun `model`, and proceed below:

In [99]:
epochs = 10
mixup = 0 if epochs<=20 else 0.2
learn2 = get_learn(model=model, size=192, bs=16, mixup=mixup)
learn2.model.load_state_dict(state)
print(learn2.validate())

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


[0.7732239, tensor(0.8979), tensor(0.9855)]


In [100]:
learn2.fit_fc(10, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
print(learn2.recorder.metrics[-1][0].item())

epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,0.606666,0.875958,0.854416,0.978621,02:36
1,0.637473,0.875326,0.849835,0.977093,02:36
2,0.619375,0.890647,0.856707,0.975057,02:36
3,0.620187,0.899035,0.851107,0.979639,02:36
4,0.611394,0.852904,0.869941,0.979639,02:36
5,0.639448,0.892186,0.853907,0.97913,02:36
6,0.632713,0.866875,0.863324,0.975057,02:36
7,0.603388,0.837288,0.869687,0.98422,02:35
8,0.566116,0.818019,0.878595,0.981166,02:36
9,0.539309,0.791606,0.888012,0.982693,02:35




0.8880122303962708


In [1]:
learn.validate()

NameError: ignored

In [87]:
learn2.fit_fc(40, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)

epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,0.665684,0.900977,0.847289,0.977348,02:38
1,0.659895,0.883306,0.859506,0.97633,02:37
2,0.64442,0.890825,0.850344,0.980911,02:37
3,0.639653,0.889162,0.851871,0.976839,02:37
4,0.636691,0.873973,0.859506,0.978112,02:37
5,0.646427,0.874967,0.86027,0.975057,02:37
6,0.623019,0.871699,0.855943,0.977602,02:37
7,0.618884,0.850166,0.864851,0.980911,02:38
8,0.618632,0.862228,0.869432,0.978112,02:38
9,0.617541,0.883283,0.856452,0.977602,02:37




Buffered data was truncated after reaching the output size limit.

In [89]:
for i in range(40):
  print(i, learn2.recorder.val_losses[i].item(), learn2.recorder.metrics[i][0].item())

0 0.9009769558906555 0.8472893834114075
1 0.8833058476448059 0.8595062494277954
2 0.8908252120018005 0.8503435850143433
3 0.8891615271568298 0.8518707156181335
4 0.8739725947380066 0.8595062494277954
5 0.8749666810035706 0.8602697849273682
6 0.8716986179351807 0.855942964553833
7 0.8501657247543335 0.8648511171340942
8 0.8622281551361084 0.8694324493408203
9 0.8832833766937256 0.8564520478248596
10 0.8733472228050232 0.8645966053009033
11 0.8668575286865234 0.8574700951576233
12 0.8748914003372192 0.8617969155311584
13 0.8977355360984802 0.8556884527206421
14 0.8786979913711548 0.8605242967605591
15 0.877534806728363 0.8567065596580505
16 0.8835002183914185 0.8589972257614136
17 0.88120436668396 0.8623059391975403
18 0.8919264078140259 0.8546704053878784
19 0.8665375113487244 0.8597607612609863
20 0.8747259378433228 0.8567065596580505
21 0.8497205376625061 0.8610333204269409
22 0.8672081828117371 0.8635784983634949
23 0.8595353364944458 0.868159830570221
24 0.8896135091781616 0.8495800

# results

## size=128


### epochs=5, 5 runs

no stride, bs=16, both act and bn

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.853887,1.693871,0.475185,0.900229,03:58
1,1.597855,1.421064,0.609824,0.941715,03:58
2,1.399992,1.231845,0.69916,0.966149,03:58
3,1.232522,1.124027,0.758463,0.966404,03:58
4,1.014952,1.00175,0.81522,0.981675,03:59




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.82989,1.758396,0.432426,0.883431,03:58
1,1.528203,1.49581,0.57012,0.933571,03:58
2,1.343309,1.279568,0.694833,0.963095,03:58
3,1.255255,1.147663,0.739374,0.970221,03:59
4,1.041715,1.004069,0.814966,0.983202,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.86097,1.700778,0.465258,0.904556,03:58
1,1.538369,1.375021,0.638585,0.94477,03:59
2,1.367675,1.224978,0.705014,0.961568,03:59
3,1.180613,1.094188,0.775261,0.971749,03:58
4,0.987883,0.977362,0.826419,0.98142,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.841401,1.678582,0.477475,0.906847,03:58
1,1.492334,1.419619,0.627641,0.952914,03:59
2,1.348735,1.279629,0.688216,0.963604,03:58
3,1.239337,1.132548,0.759735,0.972258,03:59
4,1.00099,0.989038,0.825146,0.981675,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.778878,1.642192,0.494273,0.912955,03:59
1,1.534141,1.428439,0.608552,0.942734,03:58
2,1.349526,1.314455,0.668872,0.956477,03:58
3,1.221146,1.120293,0.757699,0.973021,03:58
4,1.0059,0.979379,0.824128,0.980657,03:59


[0.81522, 0.814966, 0.826419, 0.825146, 0.824128] 0.8211758852005004


### epochs=20, 5 runs

no stride, bs=16, both act and bn

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.801173,1.698625,0.481547,0.901502,03:58
1,1.522946,1.387382,0.615933,0.950369,03:58
2,1.317431,1.288623,0.673963,0.955714,03:58
3,1.227739,1.129893,0.753881,0.970221,03:59
4,1.124146,1.070666,0.784424,0.97633,03:59
5,1.114767,1.045305,0.792823,0.975566,03:59
6,1.050665,1.001708,0.811402,0.976584,03:59
7,0.972466,0.985769,0.814966,0.983711,03:59
8,0.937151,0.992524,0.819292,0.980402,03:59
9,0.907048,0.947411,0.835073,0.980148,03:59




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.835402,1.751453,0.438534,0.890303,03:59
1,1.525845,1.374969,0.634004,0.949606,03:59
2,1.332278,1.198569,0.716213,0.966404,03:59
3,1.251647,1.178239,0.726903,0.965386,03:59
4,1.116337,1.071907,0.775515,0.972512,03:59
5,1.115612,1.07113,0.774243,0.972512,03:59
6,1.070621,1.021311,0.797149,0.980148,03:59
7,0.977338,0.975542,0.81802,0.978621,03:59
8,0.939719,0.970199,0.813693,0.977602,03:59
9,0.92381,0.915703,0.847035,0.984474,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.896753,1.734423,0.455078,0.906847,04:00
1,1.556807,1.403181,0.618223,0.946042,03:59
2,1.355759,1.273733,0.685925,0.955714,04:00
3,1.304199,1.166618,0.747518,0.966658,04:00
4,1.133599,1.094989,0.770171,0.97073,04:00
5,1.06229,1.069088,0.787732,0.969458,04:00
6,1.064758,1.01365,0.802494,0.979639,04:00
7,0.98438,0.957368,0.833291,0.982693,04:00
8,0.946529,0.996385,0.807076,0.975312,04:00
9,0.933666,0.947251,0.823619,0.980911,04:00


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.847284,1.775889,0.440825,0.892848,04:00
1,1.598621,1.442757,0.600153,0.949096,04:00
2,1.376522,1.265322,0.691779,0.956986,04:00
3,1.241434,1.172649,0.734793,0.962586,04:00
4,1.163452,1.083677,0.776533,0.975566,04:00
5,1.123032,1.051371,0.790023,0.975057,04:00
6,1.039444,1.001302,0.803767,0.97913,04:00
7,1.000067,0.971766,0.819292,0.982184,04:00
8,0.963111,1.012285,0.798167,0.975057,03:59
9,0.957597,0.925587,0.838381,0.982693,03:59


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.863371,1.890844,0.407228,0.878595,03:59
1,1.587108,1.468102,0.586663,0.942988,04:00
2,1.39958,1.308397,0.661491,0.957496,04:00
3,1.273798,1.182499,0.730466,0.966149,03:59
4,1.181075,1.14516,0.740392,0.969712,03:59
5,1.089572,1.120198,0.752609,0.973021,04:00
6,1.049326,1.013096,0.798422,0.979893,03:59
7,1.028194,1.039928,0.796131,0.974548,03:59
8,0.965961,0.990829,0.81242,0.976075,03:59
9,0.950209,0.971756,0.817765,0.980657,04:00


[0.890303, 0.88903, 0.890812, 0.890048, 0.886231] 0.8892848134040833


### epochs=80, 1 run

no stride, bs=16, no act, no bn

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.038643,1.808398,0.406974,0.869687,03:37
1,1.809108,1.66809,0.477984,0.896157,03:37
2,1.644071,1.380295,0.630695,0.947315,03:36
3,1.501918,1.250324,0.687961,0.955714,03:36
4,1.419764,1.168158,0.727157,0.965386,03:36
5,1.39219,1.110393,0.756936,0.974294,03:36
6,1.315425,1.051459,0.786969,0.975312,03:36
7,1.300064,1.040518,0.793841,0.974294,03:36
8,1.273304,0.996594,0.80733,0.979384,03:36
9,1.212061,0.999831,0.796386,0.97633,03:36


[0.901502] 0.9015016555786133



no stride, bs=16, both act and bn

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=128, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.009993,1.844606,0.379486,0.869432,03:59
1,1.781528,1.53131,0.555358,0.932807,03:58
2,1.613264,1.351579,0.640621,0.952405,03:59
3,1.511871,1.250022,0.708323,0.963095,03:59
4,1.425421,1.116004,0.758463,0.969458,03:59
5,1.373275,1.092476,0.76788,0.97073,03:58
6,1.300701,1.038407,0.790023,0.975312,03:58
7,1.282876,1.04697,0.786969,0.97633,03:58
8,1.268418,0.952731,0.826164,0.981675,03:58
9,1.188836,0.957474,0.81802,0.982438,03:58


[0.902011] 0.9020106792449951


## size=192

experimenting with channel multiplier

In [10]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

Downloading https://s3.amazonaws.com/fast-ai-imageclas/imagewoof2.tgz


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.02511,1.872146,0.425554,0.871978,03:22
1,1.808593,1.560736,0.539577,0.931026,03:23
2,1.64565,1.374737,0.63324,0.948842,03:24
3,1.537533,1.259173,0.687452,0.959786,03:24
4,1.4797,1.155471,0.737338,0.969712,03:24
5,1.411763,1.107171,0.767625,0.968949,03:24
6,1.377335,1.080276,0.770425,0.969458,03:24
7,1.304939,1.025715,0.804021,0.97353,03:24
8,1.264274,0.998292,0.799949,0.977857,03:24
9,1.258094,0.983434,0.81013,0.981166,03:24


	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)


[0.907865] 0.9078645706176758


In [14]:
res

[0.8961567878723145]



### epochs=5, 5 runs

no stride, bs=16, act=False, bn=False

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.856656,1.887479,0.400611,0.835836,07:11
1,1.61611,1.414912,0.620005,0.937898,07:11
2,1.392715,1.29101,0.676508,0.959786,07:11
3,1.269173,1.158866,0.738356,0.969203,07:11
4,1.050011,1.002696,0.822601,0.982438,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.844413,1.741399,0.445915,0.89692,07:11
1,1.561381,1.435193,0.601425,0.94197,07:11
2,1.398485,1.278045,0.679053,0.960041,07:11
3,1.272492,1.132732,0.754136,0.971494,07:11
4,1.058501,1.011058,0.813184,0.979384,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.845423,1.730929,0.465258,0.905319,07:11
1,1.53599,1.419719,0.618223,0.943497,07:10
2,1.377856,1.211514,0.715449,0.961059,07:11
3,1.239511,1.131984,0.749046,0.972003,07:11
4,1.064996,1.000053,0.813948,0.983202,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.850057,1.804008,0.418682,0.88674,07:10
1,1.552947,1.44266,0.597098,0.942479,07:11
2,1.384726,1.285309,0.679053,0.959023,07:10
3,1.249545,1.141736,0.743701,0.972258,07:11
4,1.064263,1.000224,0.822856,0.975566,07:11




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.851692,1.754275,0.448206,0.880122,07:10
1,1.57925,1.476122,0.5803,0.939679,07:11
2,1.406376,1.354866,0.640366,0.956477,07:11
3,1.256694,1.167863,0.746246,0.968694,07:11
4,1.02072,1.003255,0.817002,0.98142,07:11




[0.822601, 0.813184, 0.813948, 0.822856, 0.817002] 0.8179180502891541


no stride, bs=16, act=True, bn=False

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.816367,1.742989,0.460932,0.905574,07:18
1,1.570518,1.470981,0.579537,0.946806,07:19
2,1.335072,1.309832,0.661237,0.957496,07:18
3,1.251946,1.125895,0.758972,0.973021,07:18
4,1.014469,0.983681,0.824637,0.981675,07:18




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.812879,1.758666,0.453041,0.890812,07:18
1,1.553075,1.417638,0.613642,0.945788,07:18
2,1.365519,1.28096,0.685671,0.955205,07:18
3,1.200144,1.240493,0.729702,0.970476,07:18
4,1.026837,0.98004,0.829473,0.981166,07:19




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.844059,1.720307,0.466785,0.901502,07:18
1,1.598753,1.470148,0.602698,0.942479,07:18
2,1.341149,1.255469,0.686689,0.963095,07:18
3,1.243953,1.122435,0.754645,0.969203,07:18
4,1.031597,0.99657,0.824637,0.98142,07:18




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.82138,1.678589,0.481802,0.910919,07:18
1,1.630918,1.482963,0.576228,0.937643,07:18
2,1.379747,1.222471,0.704505,0.96844,07:18
3,1.262058,1.147718,0.755663,0.968949,07:18
4,1.063779,0.986779,0.833291,0.982947,07:19




data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.891106,1.707774,0.461441,0.902774,07:18
1,1.606799,1.477212,0.581064,0.935353,07:18
2,1.420447,1.298537,0.672436,0.959786,07:18
3,1.245861,1.184937,0.736829,0.969712,07:19
4,1.032215,0.994109,0.822601,0.978621,07:18




[0.824637, 0.829473, 0.824637, 0.833291, 0.822601] 0.8269279718399047


### epochs=20, 5 runs

no stride, bs=16, no act no bn

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=32, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

### epochs=80, 1 run

no stride, bs=16, no act no bn

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

Downloading https://s3.amazonaws.com/fast-ai-imageclas/imagewoof2.tgz


data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,2.012678,1.787375,0.416136,0.879104,07:11
1,1.827199,1.515741,0.56732,0.930262,07:11
2,1.661169,1.347738,0.643166,0.953678,07:11
3,1.538006,1.26142,0.697124,0.959023,07:11
4,1.459939,1.129074,0.7493,0.968185,07:12
5,1.386111,1.132712,0.755154,0.96564,07:11
6,1.344452,1.063014,0.784933,0.978112,07:12
7,1.331922,1.03115,0.806312,0.973021,07:12
8,1.276424,0.983552,0.819801,0.977348,07:12
9,1.231587,0.98376,0.818274,0.980657,07:11




Buffered data was truncated after reaching the output size limit.

Sorry, data was "truncated", but see the val_loss and accuracy below for proof. **92.08 is the all-time best that I've seen for Imagewoof2.**

In [None]:
for i in range(80):
  print(i, learn.recorder.val_losses[i].item(), learn.recorder.metrics[i][0].item())

0 1.7873750925064087 0.41613641381263733
1 1.5157413482666016 0.567319929599762
2 1.347738265991211 0.643166184425354
3 1.2614200115203857 0.6971239447593689
4 1.129074215888977 0.7493000626564026
5 1.1327120065689087 0.7551539540290833
6 1.063014268875122 0.784932553768158
7 1.0311501026153564 0.8063120245933533
8 0.9835519194602966 0.8198014497756958
9 0.9837595820426941 0.8182743787765503
10 0.9428547024726868 0.833545446395874
11 0.95480877161026 0.8320183157920837
12 0.9107168316841125 0.8521252274513245
13 0.9013711214065552 0.8516162037849426
14 0.9020619988441467 0.8467803597450256
15 0.8675727844238281 0.8689233660697937
16 0.871545135974884 0.8620514273643494
17 0.897378146648407 0.8500890731811523
18 0.896333634853363 0.8653601408004761
19 0.8782687187194824 0.8620514273643494
20 0.8516641855239868 0.8791040778160095
21 0.8856545686721802 0.8513616919517517
22 0.8662623763084412 0.8722321391105652
23 0.8581082820892334 0.8732501864433289
24 0.8560348153114319 0.8735046982765

### epochs=200, 1 run

no stride at stem, bs=16, no act no bn

In [None]:
res = []
for epochs in [200]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=192, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

data path   /root/.fastai/data/imagewoof2




Learn path /root/.fastai/data/imagewoof2


epoch,train_loss,valid_loss,accuracy,top_k_accuracy,time
0,1.991029,1.782373,0.423008,0.88903,07:12
1,1.820094,1.558511,0.531687,0.922372,07:11
2,1.640254,1.417206,0.610588,0.951642,07:11
3,1.524751,1.266035,0.692797,0.962077,07:11
4,1.451839,1.167214,0.736574,0.96844,07:12
5,1.363214,1.107141,0.767625,0.972767,07:12
6,1.331252,1.045224,0.792059,0.97353,07:12
7,1.325143,1.032004,0.803003,0.975821,07:12
8,1.2713,0.982155,0.821838,0.983965,07:12
9,1.266131,0.979309,0.827182,0.978875,07:12




In [None]:
res

In [None]:
for i in range(80):
  print(i, learn.recorder.val_losses[i].item(), learn.recorder.metrics[i][0].item())

## size=256

In [None]:
for epochs in [5]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [20]*5:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))

In [None]:
for epochs in [80]:
    mixup=0 if epochs<=20 else 0.2
    learn = get_learn(model=model, size=256, bs=16, mixup=mixup)
    learn.fit_fc(epochs, lr=4e-3, moms=(0.95,0.95), start_pct=0.72)
    res += [learn.recorder.metrics[-1][0].item()]
print([round(x, 6) for x in res], sum(res)/len(res))