# EfficientNet Lite in PyTorch

In [1]:
import torch
from torch import nn, optim
import math
import os
from torchinfo import summary

In [2]:
def conv_block(in_channels, out_channels, kernel_size=3, 
               stride=1, padding=0, groups=1,
               bias=False, bn=True, act = True):
    layers = [
        nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, 
                  padding=padding, groups=groups, bias=bias),
        nn.BatchNorm2d(out_channels) if bn else nn.Identity(),
        nn.ReLU6() if act else nn.Identity()
    ]
    return nn.Sequential(*layers)

In [3]:
class MBConv(nn.Module):
    """
    An implementation of the Inverted Residual from the MobileNet paper.
    """
    def __init__(self, n_in, n_out, expansion, kernel_size=3, stride=1, dropout=0.1):
        super(MBConv, self).__init__()
        self.skip_connection = (n_in == n_out) and (stride == 1)
        padding = (kernel_size-1)//2
        expanded = expansion*n_in
        
        self.expand_pw = nn.Identity() if expansion == 1 else conv_block(n_in, expanded, kernel_size=1)
        self.depthwise = conv_block(expanded, expanded, kernel_size=kernel_size, 
                                    stride=stride, padding=padding, groups=expanded)
        self.reduce_pw = conv_block(expanded, n_out, kernel_size=1, act=False)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        residual = x
        x = self.expand_pw(x)
        x = self.depthwise(x)
        x = self.reduce_pw(x)
        if self.skip_connection:
            x = self.dropout(x)
            x = x + residual
        return x

In [4]:
def mbconv1(n_in, n_out, kernel_size=3, stride=1, dropout=0.2):
    return MBConv(n_in, n_out, 1, kernel_size=kernel_size, stride=stride, dropout=dropout)

In [5]:
def mbconv6(n_in, n_out, kernel_size=3, stride=1, r=24, dropout=0.2):
    return MBConv(n_in, n_out, 6, kernel_size=kernel_size, stride=stride, dropout=dropout)

In [6]:
def create_stage(n_in, n_out, num_layers, layer=mbconv6, 
                 kernel_size=3, stride=1, dropout=0.2):
    """
    A utility for creating a single EfficientNet stage.
    """
    layers = [layer(n_in, n_out, kernel_size=kernel_size,
                       stride=stride, dropout=dropout)]
    layers += [layer(n_out, n_out, kernel_size=kernel_size,
                        dropout=dropout) for _ in range(num_layers-1)]
    return nn.Sequential(*layers)

EfficientNet Base structure

| Stage (i) | Layer     | Resolution | Channels | Layers |
|-----------|-----------|------------|----------|--------|
| 1         | `mbconv1` | 224 x 224  | 32       | 1      |
| 2         | `mbconv6` | 112 x 112  | 16       | 1      |
| 3         | `mbconv6` | 112 x 112  | 24       | 2      |
| 4         | `mbconv6` | 56 x 56    | 40       | 2      |
| 5         | `mbconv6` | 28 x 28    | 80       | 3      |
| 6         | `mbconv6` | 14 x 14    | 112      | 3      |
| 7         | `mbconv6` | 14 x 14    | 192      | 4      |
| 8         | `mbconv6` | 7 x 7      | 320      | 1      |
| 9         | `mbconv6` | 7 x 7      | 1080     | 1      |

In [7]:
### Obtained from Paper ###
widths = [32, 16, 24, 40, 80, 112, 192, 320, 1280]
depths = [1, 2, 2, 3, 3, 4, 1]
kernel_sizes = [3, 3, 5, 3, 5, 5, 3]
strides = [1, 2, 2, 2, 1, 2, 1]

In [8]:
def scale_width(w, w_factor):
    """
    This function scales the width.
    """
    w *= w_factor
    new_w = (int(w+4) // 8) * 8
    new_w = max(8, new_w)
    if new_w < 0.9*w:
        new_w += 8
    return int(new_w)

In [9]:
def efficientnet_scaler(w_factor=1, d_factor=1):
    """
    Efficientnet scaler function as defined in the paper.
    """
    scaled_widths = [scale_width(w, w_factor) for w in widths]
    scaled_widths[0] = 32
    scaled_widths[-1] = 1280
    scaled_depths = [math.ceil(d_factor*d) for d in depths]
    scaled_depths[0] = scaled_depths[-1] = 1
    return scaled_widths, scaled_depths

In [10]:
class EfficientNet(nn.Module):
    """
    Generic EfficientNet class. This model is easily customizable for you can easily swap out the classification head 
    for something more complex.
    """
    def __init__(self, w_factor=1, d_factor=1, n_classes=1000):
        super(EfficientNet, self).__init__()
        scaled_widths, scaled_depths = efficientnet_scaler(w_factor=w_factor, d_factor=d_factor)
    
        self.stem = conv_block(3, scaled_widths[0], stride=2, padding=1)
        stages = [
            create_stage(scaled_widths[i], scaled_widths[i+1], scaled_depths[i], layer= mbconv1 if i==0 else mbconv6, 
                         kernel_size=kernel_sizes[i], stride=strides[i], dropout=0.2) for i in range(7)
        ]
        self.stages = nn.Sequential(*stages)
        self.pre = conv_block(scaled_widths[-2], scaled_widths[-1], kernel_size=1)
        self.pool_flatten = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten())
        self.head = nn.Sequential(
            nn.Linear(scaled_widths[-1], n_classes)
        )
            
    def forward(self, x):
        x = self.stem(x)
        x = self.stages(x)
        x = self.pre(x)
        x = self.pool_flatten(x)
        x = self.head(x)
        return x

In [11]:
def EfficientNetSequential( w_factor=1, d_factor=1, n_classes=1000, dropout=0.2):
    """
    Another EfficientNet Builder. Is basically the same as the class above. However, it is harder to customize since you
    
    """
    scaled_widths, scaled_depths = efficientnet_scaler(w_factor=w_factor, d_factor=d_factor)
    layers = [
        conv_block(3, scaled_widths[0], stride=2, padding=1)
    ]
    stages = [
            create_stage(scaled_widths[i], scaled_widths[i+1], scaled_depths[i], layer= mbconv1 if i==0 else mbconv6, 
                         kernel_size=kernel_sizes[i], stride=strides[i], dropout=dropout) for i in range(7)
    ]
    layers = layers + stages
    layers.append(conv_block(scaled_widths[-2], scaled_widths[-1], kernel_size=1))
    layers.append(nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten()))
    layers.append(nn.Sequential(nn.Linear(scaled_widths[-1], n_classes)))
    return nn.Sequential(*layers)

In [12]:
def efficientnet_lite0(n_classes=1000, builder = EfficientNet):
    return builder(n_classes=n_classes)

In [13]:
def efficientnet_lite1(n_classes=1000, builder = EfficientNet):
    return builder(1, 1.1, n_classes=n_classes)

In [14]:
def efficientnet_lite2(n_classes=1000, builder = EfficientNet):
    return builder(1.1, 1.2, n_classes=n_classes)

In [15]:
def efficientnet_lite3(n_classes=1000, builder = EfficientNet):
    return builder(1.2, 1.4, n_classes=n_classes)

In [16]:
def efficientnet_lite4(n_classes=1000, builder = EfficientNet):
    return builder(1.4, 1.8, n_classes=n_classes)

In [28]:
img_sizes = [224, 240, 260, 280, 300]

In [29]:
lite0 = efficientnet_lite0()
lite1 = efficientnet_lite1(builder=EfficientNetSequential)
lite2 = efficientnet_lite2()
lite3 = efficientnet_lite3(builder=EfficientNetSequential)
lite4 = efficientnet_lite4()

In [35]:
models = [lite0, lite1, lite2, lite3, lite4]

In [30]:
lite_0_inp = torch.randn(1, 3, img_sizes[0], img_sizes[0])
lite_1_inp = torch.randn(1, 3, img_sizes[1], img_sizes[1])
lite0(lite_0_inp).shape, lite1(lite_1_inp).shape

(torch.Size([1, 1000]), torch.Size([1, 1000]))

In [31]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [36]:
for m in models:
    print_size_of_model(m)

Size (MB): 18.875633
Size (MB): 22.006461
Size (MB): 24.729981
Size (MB): 33.237073
Size (MB): 52.660985


In [37]:
def fmat(n):
    return "{:.2f}M".format(n / 1_000_000)

In [38]:
def params(model, f=True):
    s = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return fmat(s) if f else s

In [39]:
for m in models:
    print(params(m))

4.65M
5.42M
6.09M
8.20M
13.01M


In [58]:
i = 3
summary(models[i], (1, 3, img_sizes[i], img_sizes[i]), depth=0) # pick a model.

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               --                        --
Total params: 8,197,096
Trainable params: 8,197,096
Non-trainable params: 0
Total mult-adds (G): 1.38
Input size (MB): 0.94
Forward/backward pass size (MB): 303.76
Params size (MB): 32.79
Estimated Total Size (MB): 337.49

In [43]:
import timm

In [46]:
m = timm.create_model('tf_efficientnet_lite4', pretrained=False)

In [52]:
summary(m, (1, 3, img_sizes[4-1], img_sizes[4-1]), depth=2)

Layer (type:depth-idx)                        Output Shape              Param #
EfficientNet                                  --                        --
├─Conv2dSame: 1-1                             [1, 32, 140, 140]         864
├─BatchNorm2d: 1-2                            [1, 32, 140, 140]         64
├─ReLU6: 1-3                                  [1, 32, 140, 140]         --
├─Sequential: 1-4                             [1, 448, 9, 9]            --
│    └─Sequential: 2-1                        [1, 24, 140, 140]         1,168
│    └─Sequential: 2-2                        [1, 32, 70, 70]           54,544
│    └─Sequential: 2-3                        [1, 56, 35, 35]           165,040
│    └─Sequential: 2-4                        [1, 112, 18, 18]          858,480
│    └─Sequential: 2-5                        [1, 160, 18, 18]          1,879,392
│    └─Sequential: 2-6                        [1, 272, 9, 9]            6,992,864
│    └─Sequential: 2-7                        [1, 448, 9, 9]   