# MobileNet V2 in PyTorch

Based on this [paper](https://arxiv.org/pdf/1801.04381.pdf)

In [1]:
import torch
from torch import nn
from torchinfo import summary

In [2]:
def conv_block(in_channels, out_channels, kernel_size=3, 
               stride=1, padding=0, groups=1,
               bias=False, bn=True, act = True):
    layers = [
        nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, 
                  padding=padding, groups=groups, bias=bias),
        nn.BatchNorm2d(out_channels) if bn else nn.Identity(),
        nn.ReLU6() if act else nn.Identity()
    ]
    return nn.Sequential(*layers)

In [3]:
class MBConv(nn.Module):
    def __init__(self, n_in, n_out, expansion, kernel_size=3, stride=1, dropout=0.1):
        super(MBConv, self).__init__()
        self.skip_connection = (n_in == n_out) and (stride == 1)
        padding = (kernel_size-1)//2
        expanded = expansion*n_in
        
        self.expand_pw = nn.Identity() if expansion == 1 else conv_block(n_in, expanded, kernel_size=1)
        self.depthwise = conv_block(expanded, expanded, kernel_size=kernel_size, 
                                    stride=stride, padding=padding, groups=expanded)
        self.reduce_pw = conv_block(expanded, n_out, kernel_size=1, act=False)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        residual = x
        x = self.expand_pw(x)
        x = self.depthwise(x)
        x = self.reduce_pw(x)
        if self.skip_connection:
            x = self.dropout(x)
            x = x + residual
        return x

In [4]:
def mbconv1(n_in, n_out, kernel_size=3, stride=1, dropout=0.1):
    return MBConv(n_in, n_out, 1, kernel_size=kernel_size, stride=stride, dropout=dropout)

In [5]:
def mbconv6(n_in, n_out, kernel_size=3, stride=1, dropout=0.1):
    return MBConv(n_in, n_out, 6, kernel_size=kernel_size, stride=stride, dropout=dropout)

In [6]:
def create_stage(n_in, n_out, num_layers, layer=mbconv6, 
                 kernel_size=3, stride=1, ps=0):
    """
    A utility for creating a single EfficientNet stage.
    """
    layers = [layer(n_in, n_out, kernel_size=kernel_size,
                       stride=stride, dropout=ps)]
    layers += [layer(n_out, n_out, kernel_size=kernel_size,
                        dropout=ps) for _ in range(num_layers-1)]
    return nn.Sequential(*layers)

In [7]:
### Obtained from Paper ###
widths = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
depths = [1, 2, 3, 4, 3, 3, 1]
strides = [1, 2, 2, 2, 1, 2, 1]
ps = [0, 0.029, 0.057, 0.086, 0.114, 0.143, 0.171]

In [8]:
class MobileNetV2(nn.Module):
    """
    """
    def __init__(self, n_classes=1000):
        super(MobileNetV2, self).__init__()
        
        self.stem = conv_block(3, widths[0], stride=2, padding=1)
        stages = [
            create_stage(widths[i], widths[i+1], depths[i], layer= mbconv1 if i==0 else mbconv6, 
                         stride=strides[i], ps=ps[i]) for i in range(7)
        ]
        self.stages = nn.Sequential(*stages)
        self.pre = conv_block(widths[-2], widths[-1], kernel_size=1)
        self.pool_flatten = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten())
        self.head = nn.Sequential(
            nn.Linear(widths[-1], n_classes)
        )
            
    def forward(self, x):
        x = self.stem(x)
        x = self.stages(x)
        x = self.pre(x)
        x = self.pool_flatten(x)
        x = self.head(x)
        return x

In [9]:
model = MobileNetV2()

In [10]:
%%time
inp = torch.randn(16, 3, 224, 224)
model(inp).shape

CPU times: user 1.24 s, sys: 951 ms, total: 2.19 s
Wall time: 817 ms


torch.Size([16, 1000])

In [11]:
import os
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [12]:
print_size_of_model(model)

Size (MB): 14.261773


In [13]:
def fmat(n):
    return "{:.2f}M".format(n / 1_000_000)

In [14]:
def params(model, f=True):
    s = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return fmat(s) if f else s

In [15]:
params(model)

'3.50M'

In [16]:
summary(model, (1, 3, 224, 224))

Layer (type:depth-idx)                        Output Shape              Param #
MobileNetV2                                   --                        --
├─Sequential: 1-1                             [1, 32, 112, 112]         --
│    └─Conv2d: 2-1                            [1, 32, 112, 112]         864
│    └─BatchNorm2d: 2-2                       [1, 32, 112, 112]         64
│    └─ReLU6: 2-3                             [1, 32, 112, 112]         --
├─Sequential: 1-2                             [1, 320, 7, 7]            --
│    └─Sequential: 2-4                        [1, 16, 112, 112]         --
│    │    └─MBConv: 3-1                       [1, 16, 112, 112]         896
│    └─Sequential: 2-5                        [1, 24, 56, 56]           --
│    │    └─MBConv: 3-2                       [1, 24, 56, 56]           5,136
│    │    └─MBConv: 3-3                       [1, 24, 56, 56]           8,832
│    └─Sequential: 2-6                        [1, 32, 28, 28]           --
│    │    └─