In [99]:
# import necessary libraries
import torch
import torch.nn as nn
import torch.nn.functional as F

In [100]:
# Activation function(hard_swish),it's slightly better then ReLU.
# We use h-swish at the second half of the model since
# the cost of applying nonlinearity decreases as we go
# deeper into the network.
class H_wsish(nn.Module):
    def __init__(self):
        super(H_wsish,self).__init__()
        self.relu = nn.ReLU6()

    def forward(self,x):
        return x*self.relu(x+3.0)/6

In [101]:
# This function is taken from the original tf repo.
# It ensures that all layers have a channel number that is divisible by 8
# It can be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
def make_divisible(x, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(x + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * x:
        new_v += divisor
    return new_v

In [102]:
# create CNNBlock to simplify implementation
class CNNBlock(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size,stride,act,bn=True,**kwargs):
        super(CNNBlock,self).__init__()
        self.bn_true = bn
        self.act = act
        self.conv = nn.Conv2d(in_channels,out_channels,kernel_size,stride,**kwargs)
        self.bn = nn.BatchNorm2d(out_channels)
        self.h_swish = H_wsish()
        self.relu = nn.ReLU6()

    def forward(self,x):
        x = self.conv(x)
        if  self.bn_true: x = self.bn(x)
        # choose an activation function
        if self.act=='HS': x = self.h_swish(x)
        else: x = self.relu(x)

        return x

In [103]:
# Squeeze-and-excite block.
# The size of the SEBlock was relative the size of the convolutional bottleneck,
# but we replace them all to fixed to be 1/4 of the number of channels in expansion layer.
# It increases the accuracy, at the modest increase of number of
# parameters, and no discernible latency cost.
class SEBlock(nn.Module):
    def __init__(self,in_channels,divide=4):
        super(SEBlock,self).__init__()

        self.dense = nn.Sequential(
            nn.Linear(in_channels, in_channels // divide),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // divide, in_channels),
            H_wsish()
        )

    def forward(self,x):
        # batch, channels, height, width
        b, c, h, w = x.size()
        out = F.avg_pool2d(x, kernel_size=[h, w]).view(b, -1)
        out = self.dense(out)
        out = out.view(b, c, 1, 1)
        return out * x


In [104]:
# MobileNet_v2 bottleneck + SEBlock
class BottleNeck(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size,act,SE,stride,exp):
        super(BottleNeck,self).__init__()
        self.exp = exp
        self.stride = stride
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.SE = SE
        self.padding = (self.kernel_size - 1) // 2
        self.act = act
        
        self.pointwise = CNNBlock(in_channels,exp,kernel_size=1,stride=1,act=self.act)
        self.depthwise = CNNBlock(exp,exp,self.kernel_size,stride=self.stride,padding=self.padding,
                                    groups=exp,act=self.act)
        self.SEBlock = SEBlock(exp)
        self.conv = CNNBlock(exp,out_channels,kernel_size=1,stride=1,act=self.act)
        layers = []

        layers.append(nn.Sequential(
                self.pointwise,
                self.depthwise,
                ))

        if self.SE:
            layers.extend([
                self.SEBlock
                ])

        layers.extend([
            self.conv
            ])

        self.layers = nn.Sequential(*layers)    
            
    #condition of using of skip connections
    def forward(self,x):
        if self.stride==1 and self.in_channels==self.out_channels:
            x = x + self.layers(x)
        else:
            x = self.layers(x)

        return x

In [105]:
# In the end we can drop 3 expensive layers at no loss of accuracy,
# it's called efficient last stage.
# To see more, read the paper https://arxiv.org/pdf/1905.02244.pdf
class MobileNet_v3(nn.Module):
    def __init__(self,cnfg,img_channels,num_classes,size):
        super(MobileNet_v3,self).__init__()
        
        model = []
        self.out_channels = 16
        self.out_channels = make_divisible(self.out_channels*1.0)

        if size=='large':
            self.conv1 = CNNBlock(img_channels,self.out_channels,kernel_size=3,stride=2,act='HS')
            self.conv2 = CNNBlock(make_divisible(160*1.0),960,kernel_size=1,stride=1,act='HS')
            self.conv3 = CNNBlock(make_divisible(960*1.0),1280,kernel_size=1,stride=1,bn=False,act='HS')
            self.conv4 = nn.Conv2d(make_divisible(1280*1.0),num_classes,kernel_size=1,stride=1)
        else:
            self.conv1 = CNNBlock(img_channels,self.out_channels,kernel_size=3,stride=2,act='HS')
            self.conv2 = CNNBlock(make_divisible(96*1.0),576,kernel_size=1,stride=1,act='HS')
            self.conv3 = CNNBlock(make_divisible(576*1.0),1024,kernel_size=1,stride=1,bn=False,act='HS')
            self.conv4 = nn.Conv2d(make_divisible(1024*1.0),num_classes,kernel_size=1,stride=1)
            
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.bottlenecks = cnfg

        for inp,out,s,ker_size,SE,NL,exp_size in self.bottlenecks:
            inp = make_divisible(inp*1.0)
            out = make_divisible(out*1.0)
            exp_size = make_divisible(exp_size*1.0)
            model.append(BottleNeck(inp,out,kernel_size=ker_size,act=NL,SE=SE,stride=s,exp=exp_size))

        self.model = nn.Sequential(*model)

    def forward(self,x):
        x = self.conv1(x)
        x = self.model(x)
        x = self.conv2(x)
        x = self.avg_pool(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(-1, 1000)
        return x 


In [106]:
# create MobileNet_v3_large
def mobile_net_v3_large(img_channels=3,num_classes=1000):
    # t,c,s,ker_size,SE,NL - 
    # input channels,output channels,stride,kernel size,use SEBlock,type of nonlinearity,expansion
    cnfg = [
        [16,16,1,3,False,'RE',16],
        [16,24,2,3,False,'RE',64],
        [24,24,1,3,False,'RE',72],
        [24,40,2,5,True,'RE',72],
        [40,40,1,5,True,'RE',120],
        [40,40,1,5,True,'RE',120],
        [40,80,2,3,False,'HS',240],
        [80,80,1,3,False,'HS',200],
        [80,80,1,3,False,'HS',184],
        [80,80,1,3,False,'HS',184],
        [80,112,1,3,True,'HS',480],
        [112,112,1,3,True,'HS',672],
        [112,160,2,5,True,'HS',672],
        [160,160,1,5,True,'HS',960],
        [160,160,1,5,True,'HS',960]]

    return MobileNet_v3(cnfg,img_channels,num_classes,'large')

In [107]:
# create MobileNet_v3_small
def mobile_net_v3_small(img_channels=3,num_classes=1000):
    # t,c,s,ker_size,SE,NL - 
    # input channels,output channels,stride,kernel size,use SEBlock,type of nonlinearity,expansion    
    cnfg = [
        [16,16,2,3,True,'RE',16],
        [16,24,2,3,False,'RE',72],
        [24,24,1,3,False,'RE',88],
        [24,40,2,5,True,'HS',96],
        [40,40,1,5,True,'HS',240],
        [40,40,1,5,True,'HS',240],
        [40,48,1,5,True,'HS',120],
        [48,48,1,5,True,'HS',144],
        [48,96,2,5,True,'HS',288],
        [96,96,1,5,True,'HS',576],
        [96,96,1,5,True,'HS',576]]

    return MobileNet_v3(cnfg,img_channels,num_classes,'small')

In [108]:
# test the net architecture
def test():
    net = mobile_net_v3_small()
    x = torch.rand(2,3,224,224)
    y = net(x)
    print(y.shape)

In [109]:
test()

torch.Size([2, 1000])
