In [1]:
# Codeblock 1
import torch
import torch.nn as nn
from torchinfo import summary

In [2]:
# Codeblock 2
WIDTH_MULTIPLIER = 1.0
INPUT_RESOLUTION = 224
NUM_CLASSES      = 1000

In [5]:
# Codeblock 3
class SEModule(nn.Module):
    def __init__(self, num_channels, r):
        super().__init__()
        
        self.global_pooling = nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.fc0 = nn.Linear(in_features=num_channels,
                             out_features=num_channels//r, 
                             bias=False)
        self.relu6 = nn.ReLU6()
        self.fc1 = nn.Linear(in_features=num_channels//r,
                             out_features=num_channels, 
                             bias=False)
        self.hardsigmoid = nn.Hardsigmoid()

    def forward(self, x):
        #print(f'original\t\t: {x.size()}')
        
        squeezed = self.global_pooling(x)              #(1)
        #print(f'after avgpool\t\t: {squeezed.size()}')
        
        squeezed = torch.flatten(squeezed, 1)
        #print(f'after flatten\t\t: {squeezed.size()}')
        
        excited = self.fc0(squeezed)                   #(2)
        #print(f'after fc0\t\t: {excited.size()}')
        
        excited = self.relu6(excited)
        #print(f'after relu6\t\t: {excited.size()}')
        
        excited = self.fc1(excited)                    #(3)
        #print(f'after fc1\t\t: {excited.size()}')
        
        excited = self.hardsigmoid(excited)            #(4)
        #print(f'after hardsigmoid\t: {excited.size()}')
        
        excited = excited[:, :, None, None]
        #print(f'after reshape\t\t: {excited.size()}')
        
        scaled = x * excited                           #(5)
        #print(f'after scaling\t\t: {scaled.size()}')
        
        return scaled

In [4]:
# Codeblock 4
semodule = SEModule(num_channels=512, r=4)
x = torch.randn(1, 512, 28, 28)

out = semodule(x)

original		: torch.Size([1, 512, 28, 28])
after avgpool		: torch.Size([1, 512, 1, 1])
after flatten		: torch.Size([1, 512])
after fc0		: torch.Size([1, 128])
after relu6		: torch.Size([1, 128])
after fc1		: torch.Size([1, 512])
after hardsigmoid	: torch.Size([1, 512])
after reshape		: torch.Size([1, 512, 1, 1])
after scaling		: torch.Size([1, 512, 28, 28])


In [8]:
# Codeblock 5
class ConvBlock(nn.Module):
    def __init__(self, 
                 in_channels,             #(1)
                 out_channels,            #(2)
                 kernel_size,             #(3)
                 stride,                  #(4)
                 padding,                 #(5)
                 groups=1,                #(6)
                 batchnorm=True,          #(7)
                 activation=nn.ReLU6()):  #(8)
        super().__init__()
        
        bias = False if batchnorm else True    #(9)
        
        self.conv = nn.Conv2d(in_channels=in_channels, 
                              out_channels=out_channels,
                              kernel_size=kernel_size, 
                              stride=stride, 
                              padding=padding, 
                              groups=groups,
                              bias=bias)
        self.bn = nn.BatchNorm2d(num_features=out_channels) if batchnorm else nn.Identity()  #(10)
        self.activation = activation
    
    def forward(self, x):    #(11)
        #print(f'original\t\t: {x.size()}')
        
        x = self.conv(x)
        #print(f'after conv\t\t: {x.size()}')
        
        x = self.bn(x)
        #print(f'after bn\t\t: {x.size()}')
        
        x = self.activation(x)
        #print(f'after activation\t: {x.size()}')
        
        return x

In [7]:
# Codeblock 6
convblock1 = ConvBlock(in_channels=64, 
                       out_channels=128, 
                       kernel_size=3, 
                       stride=2, 
                       padding=1)

convblock2 = ConvBlock(in_channels=64, 
                       out_channels=128, 
                       kernel_size=3, 
                       stride=2, 
                       padding=1, 
                       batchnorm=False,             #(1)
                       activation=nn.Hardswish())   #(2)

print(convblock1)
print('')
print(convblock2)

ConvBlock(
  (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (activation): ReLU6()
)

ConvBlock(
  (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bn): Identity()
  (activation): Hardswish()
)


In [11]:
# Codeblock 7a
class Bottleneck(nn.Module):
    def __init__(self, 
                 in_channels, 
                 out_channels, 
                 kernel_size, 
                 stride,
                 padding,
                 exp_size,     #(1)
                 se,           #(2)
                 activation):
        super().__init__()

        self.add = in_channels == out_channels and stride == 1    #(3)

        self.conv0 = ConvBlock(in_channels=in_channels,    #(4)
                               out_channels=exp_size,    #(5)
                               kernel_size=1,    #(6)
                               stride=1, 
                               padding=0,
                               activation=activation)
                               
        self.conv1 = ConvBlock(in_channels=exp_size,    #(7)
                               out_channels=exp_size,    #(8)
                               kernel_size=kernel_size,    #(9)
                               stride=stride, 
                               padding=padding,
                               groups=exp_size,    #(10)
                               activation=activation)

        self.semodule = SEModule(num_channels=exp_size, r=4) if se else nn.Identity()    #(11)

        self.conv2 = ConvBlock(in_channels=exp_size,    #(12)
                               out_channels=out_channels,    #(13)
                               kernel_size=1,    #(14)
                               stride=1, 
                               padding=0, 
                               activation=nn.Identity())    #(15)
        
    # Codeblock 7b
    def forward(self, x):
            residual = x
            #print(f'original\t\t: {x.size()}')

            x = self.conv0(x)
            #print(f'after conv0\t\t: {x.size()}')

            x = self.conv1(x)
            #print(f'after conv1\t\t: {x.size()}')

            x = self.semodule(x)
            #print(f'after semodule\t\t: {x.size()}')

            x = self.conv2(x)
            #print(f'after conv2\t\t: {x.size()}')

            if self.add:
                x += residual
                #print(f'after summation\t\t: {x.size()}')

            return x

In [10]:
# Codeblock 8
bottleneck = Bottleneck(in_channels=16,
                        out_channels=24,   #(1)
                        kernel_size=3,     #(2)
                        exp_size=64,       #(3)
                        stride=2,          #(4)
                        padding=1, 
                        se=False,          #(5)
                        activation=nn.ReLU6())  #(6)

x = torch.randn(1, 16, 112, 112)           #(7)
out = bottleneck(x)

original		: torch.Size([1, 16, 112, 112])
after conv0		: torch.Size([1, 64, 112, 112])
after conv1		: torch.Size([1, 64, 56, 56])
after semodule		: torch.Size([1, 64, 56, 56])
after conv2		: torch.Size([1, 24, 56, 56])


In [12]:
# Codeblock 9
bottleneck

Bottleneck(
  (conv0): ConvBlock(
    (conv): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): ReLU6()
  )
  (conv1): ConvBlock(
    (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): ReLU6()
  )
  (semodule): Identity()
  (conv2): ConvBlock(
    (conv): Conv2d(64, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): Identity()
  )
)

In [13]:
# Codeblock 10
bottleneck = Bottleneck(in_channels=24, 
                        out_channels=40, 
                        kernel_size=5, 
                        exp_size=72,
                        stride=2, 
                        padding=2, 
                        se=True, 
                        activation=nn.ReLU6())

bottleneck

Bottleneck(
  (conv0): ConvBlock(
    (conv): Conv2d(24, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): ReLU6()
  )
  (conv1): ConvBlock(
    (conv): Conv2d(72, 72, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=72, bias=False)
    (bn): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): ReLU6()
  )
  (semodule): SEModule(
    (global_pooling): AdaptiveAvgPool2d(output_size=(1, 1))
    (fc0): Linear(in_features=72, out_features=18, bias=False)
    (relu6): ReLU6()
    (fc1): Linear(in_features=18, out_features=72, bias=False)
    (hardsigmoid): Hardsigmoid()
  )
  (conv2): ConvBlock(
    (conv): Conv2d(72, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (activation): Identity()
  )
)

In [14]:
# Codeblock 11
HS = nn.Hardswish()
RE = nn.ReLU6()

BOTTLENECKS = [[16,  16,  3, 16,  False, RE, 1, 1], 
               [16,  24,  3, 64,  False, RE, 2, 1], 
               [24,  24,  3, 72,  False, RE, 1, 1], 
               [24,  40,  5, 72,  True,  RE, 2, 2], 
               [40,  40,  5, 120, True,  RE, 1, 2], 
               [40,  40,  5, 120, True,  RE, 1, 2], 
               [40,  80,  3, 240, False, HS, 2, 1], 
               [80,  80,  3, 200, False, HS, 1, 1], 
               [80,  80,  3, 184, False, HS, 1, 1], 
               [80,  80,  3, 184, False, HS, 1, 1], 
               [80,  112, 3, 480, True,  HS, 1, 1], 
               [112, 112, 3, 672, True,  HS, 1, 1], 
               [112, 160, 5, 672, True,  HS, 2, 2], 
               [160, 160, 5, 960, True,  HS, 1, 2], 
               [160, 160, 5, 960, True,  HS, 1, 2]]

In [15]:
# Codeblock 12a
class MobileNetV3(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.first_conv = ConvBlock(in_channels=3,    #(1)
                                    out_channels=int(WIDTH_MULTIPLIER*16),
                                    kernel_size=3,
                                    stride=2,
                                    padding=1, 
                                    activation=nn.Hardswish())
        
        self.blocks = nn.ModuleList([])    #(2)
        for config in BOTTLENECKS:         #(3)
            in_channels, out_channels, kernel_size, exp_size, se, activation, stride, padding = config
            self.blocks.append(Bottleneck(in_channels=int(WIDTH_MULTIPLIER*in_channels), 
                                          out_channels=int(WIDTH_MULTIPLIER*out_channels), 
                                          kernel_size=kernel_size, 
                                          exp_size=int(WIDTH_MULTIPLIER*exp_size), 
                                          stride=stride, 
                                          padding=padding, 
                                          se=se, 
                                          activation=activation))
        
        self.second_conv = ConvBlock(in_channels=int(WIDTH_MULTIPLIER*160), #(4)
                                     out_channels=int(WIDTH_MULTIPLIER*960),
                                     kernel_size=1,
                                     stride=1,
                                     padding=0, 
                                     activation=nn.Hardswish())
        
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))              #(5)
        
        self.third_conv = ConvBlock(in_channels=int(WIDTH_MULTIPLIER*960),  #(6)
                                    out_channels=int(WIDTH_MULTIPLIER*1280),
                                    kernel_size=1,
                                    stride=1,
                                    padding=0, 
                                    batchnorm=False,
                                    activation=nn.Hardswish())
        
        self.dropout = nn.Dropout(p=0.8)    #(7)
        
        self.output = ConvBlock(in_channels=int(WIDTH_MULTIPLIER*1280),     #(8)
                                out_channels=int(NUM_CLASSES),              #(9)
                                kernel_size=1,
                                stride=1,
                                padding=0, 
                                batchnorm=False,
                                activation=nn.Identity())
        
# Codeblock 12b
    def forward(self, x):
        print(f'original\t\t: {x.size()}')

        x = self.first_conv(x)
        print(f'after first_conv\t: {x.size()}')
        
        for i, block in enumerate(self.blocks):
            x = block(x)
            print(f"after bottleneck #{i}\t: {x.shape}")
        
        x = self.second_conv(x)
        print(f'after second_conv\t: {x.size()}')
        
        x = self.avgpool(x)
        print(f'after avgpool\t\t: {x.size()}')
        
        x = self.third_conv(x)
        print(f'after third_conv\t: {x.size()}')
        
        x = self.dropout(x)
        print(f'after dropout\t\t: {x.size()}')
        
        x = self.output(x)
        print(f'after output\t\t: {x.size()}')
        
        x = torch.flatten(x, start_dim=1)
        print(f'after flatten\t\t: {x.size()}')
            
        return x

In [16]:
# Codeblock 13
mobilenetv3 = MobileNetV3()

x = torch.randn(1, 3, INPUT_RESOLUTION, INPUT_RESOLUTION)
out = mobilenetv3(x)

original		: torch.Size([1, 3, 224, 224])
after first_conv	: torch.Size([1, 16, 112, 112])
after bottleneck #0	: torch.Size([1, 16, 112, 112])
after bottleneck #1	: torch.Size([1, 24, 56, 56])
after bottleneck #2	: torch.Size([1, 24, 56, 56])
after bottleneck #3	: torch.Size([1, 40, 28, 28])
after bottleneck #4	: torch.Size([1, 40, 28, 28])
after bottleneck #5	: torch.Size([1, 40, 28, 28])
after bottleneck #6	: torch.Size([1, 80, 14, 14])
after bottleneck #7	: torch.Size([1, 80, 14, 14])
after bottleneck #8	: torch.Size([1, 80, 14, 14])
after bottleneck #9	: torch.Size([1, 80, 14, 14])
after bottleneck #10	: torch.Size([1, 112, 14, 14])
after bottleneck #11	: torch.Size([1, 112, 14, 14])
after bottleneck #12	: torch.Size([1, 160, 7, 7])
after bottleneck #13	: torch.Size([1, 160, 7, 7])
after bottleneck #14	: torch.Size([1, 160, 7, 7])
after second_conv	: torch.Size([1, 960, 7, 7])
after avgpool		: torch.Size([1, 960, 1, 1])
after third_conv	: torch.Size([1, 1280, 1, 1])
after dropout		:

In [17]:
# Codeblock 14
total_params = sum(p.numel() for p in mobilenetv3.parameters())
total_params

5476416