In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.script import *
from fastai.vision import *
from fastai.callbacks import *
from fastai.distributed import *
from fastprogress import fastprogress
from fastai import datasets

import torch
import torchvision
import torchvision.transforms as transforms

defaults.cmap = "binary"

torch.backends.cudnn.benchmark = True
fastprogress.MAX_COLS = 80

size = 128 # from https://github.com/fastai/fastai/blob/master/examples/train_imagenette.py#L29
bs = 32

In [2]:
path = untar_data(URLs.IMAGENETTE_160)
tfms = get_transforms(do_flip=True)

In [3]:
data = (ImageList.from_folder(path).split_by_folder(valid='val')
            .label_from_folder().transform(size=size)
            .databunch(bs=bs)
.normalize(imagenet_stats))

In [4]:
# data.show_batch(rows=3)

In [5]:
# Basic ResNet from torchvision

from torchvision.models import ResNet
from torchvision.models.resnet import conv1x1, conv3x3, BasicBlock, Bottleneck

# ResNet boilerplate

# def conv1x1(in_planes, out_planes, stride=1):
#     """1x1 convolution"""
#     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

In [6]:
# ResNet with ReLUs replaced with ELUs

class ELU(nn.Module):
    def __init__(self, alpha=1, inplace=False):
        super(ELU, self).__init__()
        self.alpha = alpha
        self.inplace = inplace

    def forward(self, input):
        if self.inplace:
            result = torch._C._nn.elu_(input, self.alpha)
        else:
            result = torch._C._nn.elu(input, self.alpha)
            
        return result

    def extra_repr(self):
        inplace_str = ', inplace' if self.inplace else ''
        return 'alpha={}{}'.format(self.alpha, inplace_str)

class ELUBottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(ELUBottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = ELU(alpha=1, inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ELUBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(ELUBasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = ELU(alpha=1, inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ELUResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(ELUResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = ELU(alpha=1, inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [7]:
# Model Factories
def base_rn18(pretrained=False, **kwargs):
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    return model
def base_rn101(pretrained=False, **kwargs):
    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
    return model

def elu_rn18(pretrained=False, **kwargs):
    model = ELUResNet(ELUBasicBlock, [2, 2, 2, 2], **kwargs)
    return model
def elu_rn101(pretrained=False, **kwargs):
    model = ELUResNet(ELUBottleneck, [3, 4, 23, 3], **kwargs)
    return model

# Stats Accumulator
def average_perf(n, model_creator):
    """
    Build n custom learners from scratch and find average accuracy
    """
    acc_list = []
    
    while n > 0:
        custom_learn = cnn_learner(data, model_creator, metrics=accuracy)
        custom_learn.fit_one_cycle(5, 1e-2)
        acc_list.append(custom_learn.recorder.metrics[-1][0].item())
        n-=1
    return acc_list

## ResNet 18

In [13]:
acc_list = average_perf(5, base_rn18)
from statistics import mean
mean(acc_list)

epoch,train_loss,valid_loss,accuracy,time
0,1.993207,1.773090,0.416000,00:09
1,1.616346,1.408859,0.540000,00:09
2,1.434437,1.228347,0.594000,00:09
3,1.277914,1.130187,0.642000,00:08
4,1.146013,1.087515,0.662000,00:08


epoch,train_loss,valid_loss,accuracy,time
0,2.028860,1.774787,0.402000,00:08
1,1.705326,1.424648,0.516000,00:08
2,1.464102,1.249780,0.612000,00:08
3,1.245688,1.091890,0.642000,00:08
4,1.141047,1.093695,0.638000,00:08


epoch,train_loss,valid_loss,accuracy,time
0,1.988783,1.777709,0.396000,00:08
1,1.654843,1.516314,0.492000,00:08
2,1.364176,1.192454,0.602000,00:10
3,1.217043,1.089286,0.628000,00:08
4,1.080324,1.069616,0.664000,00:08


epoch,train_loss,valid_loss,accuracy,time
0,2.022770,1.806046,0.392000,00:09
1,1.713160,1.401669,0.528000,00:08
2,1.463466,1.288973,0.560000,00:09
3,1.301730,1.101513,0.632000,00:10
4,1.110465,1.076316,0.644000,00:09


epoch,train_loss,valid_loss,accuracy,time
0,1.991029,1.747223,0.432000,00:09
1,1.604519,1.381188,0.532000,00:09
2,1.376892,1.128871,0.626000,00:08
3,1.214790,1.063990,0.668000,00:09
4,1.046715,1.023861,0.672000,00:09


0.6559999942779541

## ELU ResNet 18

In [14]:
acc_list = average_perf(5, elu_rn18)
from statistics import mean
mean(acc_list)

epoch,train_loss,valid_loss,accuracy,time
0,1.753064,1.485148,0.490000,00:08
1,1.427631,1.186490,0.610000,00:08
2,1.252711,1.076282,0.638000,00:08
3,1.070924,0.930918,0.706000,00:09
4,0.947818,0.891051,0.720000,00:08


epoch,train_loss,valid_loss,accuracy,time
0,1.721880,1.420173,0.540000,00:08
1,1.446061,1.347655,0.548000,00:08
2,1.252090,1.015776,0.674000,00:08
3,1.046147,0.881330,0.706000,00:09
4,0.922166,0.854301,0.716000,00:08


epoch,train_loss,valid_loss,accuracy,time
0,1.776605,1.502506,0.476000,00:09
1,1.435791,1.250680,0.574000,00:09
2,1.298466,1.155733,0.608000,00:09
3,1.065737,0.969847,0.666000,00:09
4,0.943262,0.953246,0.672000,00:08


epoch,train_loss,valid_loss,accuracy,time
0,1.775530,1.515474,0.510000,00:08
1,1.448253,1.479407,0.496000,00:09
2,1.310687,1.088151,0.650000,00:09
3,1.069814,0.975970,0.692000,00:09
4,0.935649,0.933849,0.692000,00:09


epoch,train_loss,valid_loss,accuracy,time
0,1.751293,1.427981,0.534000,00:08
1,1.457620,1.240726,0.592000,00:08
2,1.278188,1.002889,0.684000,00:08
3,1.041469,0.923149,0.694000,00:08
4,0.931011,0.886781,0.712000,00:08


0.702400004863739

## ResNet 101

In [15]:
acc_list = average_perf(5, base_rn101)
from statistics import mean
mean(acc_list)

epoch,train_loss,valid_loss,accuracy,time
0,2.428282,2.369799,0.212000,00:27
1,2.185912,2.075623,0.252000,00:26
2,2.108149,2.042412,0.268000,00:26
3,2.013483,1.920211,0.334000,00:26
4,1.973544,1.892210,0.332000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,2.479833,2.359621,0.126000,00:26
1,2.202357,2.150367,0.226000,00:26
2,2.088353,2.019446,0.270000,00:26
3,1.982446,2.446059,0.344000,00:26
4,1.920187,1.830943,0.382000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,2.389247,2.371348,0.184000,00:26
1,2.174075,2.068883,0.250000,00:26
2,2.029560,1.988616,0.274000,00:26
3,1.965583,1.889348,0.334000,00:26
4,1.938990,1.918869,0.326000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,2.386970,2.335135,0.180000,00:26
1,2.193945,2.289963,0.200000,00:26
2,2.121176,6.202655,0.284000,00:26
3,2.003940,7.062656,0.332000,00:26
4,1.978103,7.857753,0.330000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,2.406770,2.529673,0.152000,00:26
1,2.188746,2.143609,0.226000,00:26
2,2.054996,1.948175,0.322000,00:27
3,1.971834,1.868524,0.354000,00:26
4,1.908179,1.806447,0.380000,00:26


0.35

## ELU ResNet 101

In [16]:
acc_list = average_perf(5, elu_rn101)
from statistics import mean
mean(acc_list)

epoch,train_loss,valid_loss,accuracy,time
0,1.821190,1.659477,0.460000,00:26
1,1.391616,1.242952,0.592000,00:26
2,1.188016,1.060732,0.656000,00:26
3,0.892670,0.894200,0.692000,00:26
4,0.571411,0.887915,0.732000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,1.821844,1.627754,0.462000,00:26
1,1.447161,1.233271,0.580000,00:26
2,1.229707,1.007942,0.668000,00:26
3,0.953061,0.874154,0.716000,00:26
4,0.643270,0.898632,0.724000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,1.807091,1.515607,0.492000,00:26
1,1.421114,1.218070,0.594000,00:28
2,1.207544,1.071286,0.664000,00:27
3,0.924212,0.872984,0.726000,00:26
4,0.598121,0.849575,0.754000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,1.811163,1.515720,0.482000,00:26
1,1.371264,1.306995,0.550000,00:26
2,1.197574,1.077598,0.642000,00:26
3,0.897983,0.915420,0.682000,00:26
4,0.674504,0.912590,0.712000,00:26


epoch,train_loss,valid_loss,accuracy,time
0,1.801394,1.462869,0.510000,00:26
1,1.384144,1.308211,0.580000,00:26
2,1.159598,0.968870,0.700000,00:26
3,0.903109,0.903474,0.720000,00:26
4,0.616099,0.873004,0.720000,00:26


0.7284000039100647