**MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications**    
*Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam*   
[[paper](https://arxiv.org/abs/1704.04861)]   
CVPR 2017   

In [2]:
import torch
import torch.nn as nn


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, stride=1) -> None:
        super(DepthwiseSeparableConv, self).__init__()

        self.depthwise = nn.Conv2d(in_channels=in_dim, out_channels=hidden_dim, kernel_size=3, stride=stride, padding=stride, groups=in_dim, bias=False)
        self.norm1     = nn.BatchNorm2d(hidden_dim)
        
        self.pointwise = nn.Conv2d(in_channels=hidden_dim, out_channels=out_dim, kernel_size=1, stride=1, bias=False)
        self.norm2     = nn.BatchNorm2d(out_dim)

        self.act       = nn.ReLU() # nn.SiLU()

    def forward(self, x):

        d = self.depthwise(x)
        d = self.norm1(d)
        d = self.act(d)

        p = self.pointwise(d)
        p = self.norm2(p)
        p = self.act(p)

        return p 

In [None]:
class MobileNetv1(nn.Module):
    def __init__(self, init_dim=32, num_classes=1000, alpha=1) -> None:
        super(MobileNetv1, self).__init__()
        
        self.dim = int(alpha * init_dim)

        # 224x224x3 -> 112x112x32
        self.init_conv = nn.Conv2d(3, self.dim, kerenl_size=3, stride=2, padding=2)
        self.init_norm = nn.BatchNorm2d(self.dim)
        self.init_act  = nn.ReLU()

        # 112x112x32 -> 56x56x64
        self.dim *= 2 # 64
        self.dwise_conv1 = nn.Sequential(
            DepthwiseSeparableConv(in_dim=self.dim//2, hidden_dim=self.dim//32, out_dim=self.dim  , stride=1),
            DepthwiseSeparableConv(in_dim=self.dim,    hidden_dim=self.dim, out_dim=self.dim*2, stride=2)
        )

        # 56x56x64 -> 28x28x128
        self.dim *= 2 # 128
        self.dwise_conv2 = nn.Sequential(
            DepthwiseSeparableConv(in_dim=self.dim, hidden_dim=self.dim, out_dim=self.dim,  stride=1),
            DepthwiseSeparableConv(in_dim=self.dim, hidden_dim=self.dim, out_dim=self.dim*2, stride=2)
        )

        # 28x28x128 -> 14x14x256
        self.dim *= 2 # 256
        self.dwise_conv3 = nn.Sequential(
            DepthwiseSeparableConv(in_dim=self.dim, hidden_dim=self.dim, out_dim=self.dim, stride=1),
            DepthwiseSeparableConv(in_dim=self.dim, hidden_dim=self.dim, out_dim=self.dim*2, stride=2)
        )

        # 14x14x256 -> 7x7x1024
        self.dim *= 2 # 512
        self.dwise_conv4 = nn.Sequential(
            *([DepthwiseSeparableConv(in_dim=self.dim, hidden_dim=self.dim, out_dim=self.dim, stride=1) for _ in range(5)]
            +[DepthwiseSeparableConv(in_dim=self.dim, hidden_dim=self.dim, out_dim=self.dim*2, stride=2)])
        )

        # 7x7x1024 -> 7x7x1024
        self.dim *= 2 # 1024
        self.dwise_conv5 = DepthwiseSeparableConv(in_dim=self.dim, hidden_dim=self.dim, out_dim=self.dim, stride=2)

        self.pool = nn.AdaptiveAvgPool2d(output_size=1)

        # 1x1x1024 -> 1x1x1000 (# of classes)
        self.fc = nn.Conv2d(self.dim, num_classes, kernel_size=1, stride=1)

    def forward(self, x):

        x = self.init_conv(x)
        x = self.init_norm(x)
        x = self.init_act(x)

        h = self.dwise_conv1(x)
        h = self.dwise_conv2(h)
        h = self.dwise_conv3(h)
        h = self.dwise_conv4(h)
        h = self.dwise_conv5(h)

        p = self.pool(h)

        out = self.fc(p)
        
        return out.view(out.size(0), -1)