In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

from fastai.script import *
from fastai.vision import *
from fastai.callbacks import *
from fastai.distributed import *
from fastprogress import fastprogress
from fastai import datasets

import torch
import torchvision
import torchvision.transforms as transforms

defaults.cmap = "binary"

torch.backends.cudnn.benchmark = True
fastprogress.MAX_COLS = 80

size = 128 # from https://github.com/fastai/fastai/blob/master/examples/train_imagenette.py#L29
bs = 32

In [2]:
path = untar_data(URLs.IMAGENETTE_160)
tfms = get_transforms(do_flip=True)

In [3]:
data = (ImageList.from_folder(path).split_by_folder(valid='val')
            .label_from_folder().transform(size=size)
            .databunch(bs=bs)
.normalize(imagenet_stats))

In [4]:
# data.show_batch(rows=3)

In [5]:
# Basic ResNet from torchvision

from torchvision.models import ResNet
from torchvision.models.resnet import conv1x1, conv3x3, BasicBlock, Bottleneck

# ResNet boilerplate

# def conv1x1(in_planes, out_planes, stride=1):
#     """1x1 convolution"""
#     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

In [6]:
# ResNet with ReLUs replaced with ELUs

class ELU(nn.Module):
    def __init__(self, alpha=1, inplace=False):
        super(ELU, self).__init__()
        self.alpha = alpha
        self.inplace = inplace

    def forward(self, input):
        if self.inplace:
            result = torch._C._nn.elu_(input, self.alpha)
        else:
            result = torch._C._nn.elu(input, self.alpha)
            
        return result

    def extra_repr(self):
        inplace_str = ', inplace' if self.inplace else ''
        return 'alpha={}{}'.format(self.alpha, inplace_str)

class ELUBottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(ELUBottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = ELU(alpha=1, inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class NoBN_ELUBottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(NoBN_ELUBottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
#         self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)
#         self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
#         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = ELU(alpha=1, inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
#         out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
#         out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
#         out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out
    
class NoBN_BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(NoBN_BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
#         self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
#         self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
#         out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
#         out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

### Bottleneck

In [7]:
class ELUBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(ELUBasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = ELU(alpha=1, inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out
    
class NoBN_ELUBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(NoBN_ELUBasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
#         self.bn1 = nn.BatchNorm2d(planes)
        self.relu = ELU(alpha=1, inplace=True)
        self.conv2 = conv3x3(planes, planes)
#         self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
#         out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
#         out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out
    
class NoBN_Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(NoBN_Bottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
#         self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)
#         self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
#         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
#         out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
#         out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
#         out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [8]:
class NoBN_ELUResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(NoBN_ELUResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
#         self.bn1 = nn.BatchNorm2d(64)
        self.relu = ELU(alpha=1, inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
#         if zero_init_residual:
#             for m in self.modules():
#                 if isinstance(m, Bottleneck):
#                     nn.init.constant_(m.bn3.weight, 0)
#                 elif isinstance(m, BasicBlock):
#                     nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
#                 nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
#         x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [9]:
class ELUResNet(nn.Module):
    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(ELUResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = ELU(alpha=1, inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [16]:
class NoBN_ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(NoBN_ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
#         self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
#         if zero_init_residual:
#             for m in self.modules():
#                 if isinstance(m, Bottleneck):
#                     nn.init.constant_(m.bn3.weight, 0)
#                 elif isinstance(m, BasicBlock):
#                     nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
#                 nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
#         x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [19]:
# Model Factories
## Base Model Factories
def base_rn18(pretrained=False, **kwargs):
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    return model

def nobn_base_rn18(pretrained=False, **kwargs):
    model = NoBN_ResNet(NoBN_BasicBlock, [2, 2, 2, 2], **kwargs)
    return model

def base_rn101(pretrained=False, **kwargs):
    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
    return model

def nobn_base_rn101(pretrained=False, **kwargs):
    return NoBN_ResNet(NoBN_Bottleneck, [3, 4, 23, 3], **kwargs)

## ELU RN18 Factories

def elu_rn18(pretrained=False, **kwargs):
    model = ELUResNet(ELUBasicBlock, [2, 2, 2, 2], **kwargs)
    return model

def nobn_elu_rn18(pretrained=False, **kwargs):
    model = NoBN_ELUResNet(NoBN_ELUBasicBlock,[2, 2, 2, 2], **kwargs)
    return model
    
## ELU RN101 factories

def elu_rn101(pretrained=False, **kwargs):
    model = ELUResNet(ELUBottleneck, [3, 4, 23, 3], **kwargs)
    return model

def nobn_elu_rn101(pretrained=False, **kwargs):
    model = NoBN_ELUResNet(NoBN_ELUBottleneck, [3, 4, 23, 3], **kwargs)
    return model

In [12]:
from statistics import mean
def average_perf(n, model_creator):
    """
    Build n custom learners from scratch and find average accuracy
    """
    acc_list = []
    
    for _ in range(n):
        custom_learn = cnn_learner(data, model_creator, metrics=accuracy)
        custom_learn.fit_one_cycle(5, 1e-2)
        acc_list.append(custom_learn.recorder.metrics[-1][0].item())
    print(f"Mean accuracy over {n} runs(s) is {mean(acc_list)}")
    return acc_list

## ResNet 18
### Base

In [13]:
acc_list = average_perf(1, base_rn18)

epoch,train_loss,valid_loss,accuracy,time
0,2.023332,1.825159,0.404,00:14
1,1.625634,1.488958,0.476,00:13
2,1.406511,1.246071,0.562,00:13
3,1.244813,1.121302,0.626,00:13
4,1.124004,1.073009,0.644,00:13


Mean accuracy over 1 runs(s) is 0.6439999938011169


### No Batchnorm

In [20]:
acc_list = average_perf(1, nobn_base_rn18)

epoch,train_loss,valid_loss,accuracy,time
0,2.08181,1.804427,0.374,00:09
1,1.91211,1.737468,0.384,00:09
2,1.834521,1.614318,0.44,00:09
3,1.70652,1.522835,0.496,00:09
4,1.62425,1.488148,0.49,00:09


Mean accuracy over 1 runs(s) is 0.49000000953674316


## ELU ResNet 18

In [21]:
acc_list = average_perf(1, elu_rn18)

epoch,train_loss,valid_loss,accuracy,time
0,1.800465,1.560575,0.472,00:13
1,1.455747,1.183137,0.594,00:13
2,1.260119,1.106642,0.614,00:13
3,1.112375,0.971657,0.686,00:13
4,0.960372,0.950047,0.676,00:13


Mean accuracy over 1 runs(s) is 0.6759999990463257


### No Batchnorm

In [22]:
acc_list = average_perf(1, nobn_elu_rn18)

epoch,train_loss,valid_loss,accuracy,time
0,1.872543,1.583447,0.452,00:09
1,1.777923,1.497934,0.498,00:09
2,1.658929,1.401648,0.52,00:09
3,1.556396,1.327714,0.56,00:09
4,1.482339,1.315337,0.56,00:09


Mean accuracy over 1 runs(s) is 0.5600000023841858


## ResNet 101
### Base

In [23]:
acc_list = average_perf(1, base_rn101)

epoch,train_loss,valid_loss,accuracy,time
0,2.44453,2.417264,0.162,00:58
1,2.181121,2.127283,0.218,00:56
2,2.041199,1.982507,0.334,00:56
3,1.918381,2.399356,0.354,00:56
4,1.886666,2.63155,0.378,00:56


Mean accuracy over 1 runs(s) is 0.3779999911785126


### No Batchnorm

In [24]:
acc_list = average_perf(1, nobn_base_rn101)

epoch,train_loss,valid_loss,accuracy,time
0,2.39708,2.488739,0.186,00:22
1,2.24672,2.155264,0.24,00:22
2,2.131895,163.749313,0.314,00:22
3,2.099322,45.957848,0.328,00:22
4,2.044532,6.685519,0.336,00:22


Mean accuracy over 1 runs(s) is 0.335999995470047


## ELU ResNet 101

In [25]:
acc_list = average_perf(1, elu_rn101)

epoch,train_loss,valid_loss,accuracy,time
0,1.83004,1.568955,0.436,00:56
1,1.401859,1.263604,0.586,00:56
2,1.153251,1.116985,0.628,00:56
3,0.930289,0.865397,0.738,00:56
4,0.596853,1.159572,0.71,00:56


Mean accuracy over 1 runs(s) is 0.7099999785423279


### No BatchNorm

In [26]:
acc_list = average_perf(1, nobn_elu_rn101)

epoch,train_loss,valid_loss,accuracy,time
0,2.415751,2.397038,0.232,00:23
1,2.21175,2.241375,0.236,00:23
2,2.109448,9.244571,0.286,00:23
3,2.070813,653.704224,0.318,00:23
4,2.002895,746.002319,0.34,00:23


Mean accuracy over 1 runs(s) is 0.3400000035762787


In [28]:
print(acc_list)

[0.3400000035762787]
