<a href="https://colab.research.google.com/github/CelikAbdullah/deep-learning-notebooks/blob/main/Computer%20Vision/models/MobileNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
from tensorflow import keras

# MobileNet v1

## The stem component

### The depthwise separable conv block

In [36]:
def depthwise_block(x, n_filters, alpha, strides):
  """ Build a depthwise separable convolution block
      x         : input to the block
      n_filters : number of filters
      alpha     : width multiplier
      strides   : strides
  """
  # apply the width filter to the number of feature maps
  filters = int(n_filters * alpha)

  # strided convolution to match number of filters
  if strides == (2, 2):
    x = keras.layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(x)
    padding = 'valid'
  else:
    padding = 'same'

  # apply depthwise convolution
  x = keras.layers.DepthwiseConv2D(kernel_size=3, strides=strides, padding=padding, use_bias=False, kernel_initializer='glorot_uniform')(x)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.ReLU(6.0)(x)

  # apply pointwise convolution
  x = keras.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='glorot_uniform')(x)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.ReLU(6.0)(x)

  return x

### Stem Component

In [37]:
def stem(inputs, alpha):
  """ Build the stem component.
      inputs : input tensor
      alpha  : width multiplier
  """
  # apply zero-padding
  x = keras.layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs)
  x = keras.layers.Conv2D(filters=32 * alpha, kernel_size=3, strides=2, padding='valid', use_bias=False, kernel_initializer='glorot_uniform')(x)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.ReLU(6.0)(x)

  # Depthwise Separable Convolution Block
  x = depthwise_block(x, 64, alpha, (1, 1))

  return x

## The learner component

### Depthwise Separable Convolution Group

In [38]:
def group(x, n_filters, n_blocks, alpha):
  """ Build a Depthwise Separable Convolution Group
      x         : input to the group
      n_filters : number of filters
      n_blocks  : number of blocks in the group
      alpha     : width multiplier
  """
  # in the first block, the depthwise convolution is strided; feature map size reduction
  x = depthwise_block(x, n_filters, alpha, strides=(2, 2))

  # create the remaining blocks
  for _ in range(n_blocks - 1):
    x = depthwise_block(x, n_filters, alpha, strides=(1, 1))

  return x

### Learner component

In [39]:
def learner(x, alpha):
  """ Build the learner component
      x      : input to the learner
      alpha  : width multiplier
  """
  # create the first depthwise separable convolution group
  x = group(x, 128, 2, alpha)
  # create the second depthwise separable convolution group
  x = group(x, 256, 2, alpha)
  # create the third depthwise separable convolution group
  x = group(x, 512, 6, alpha)
  # create the fourth depthwise separable convolution group
  x = group(x, 1024, 2, alpha)

  return x

## The task component

In [40]:
def task(x, alpha, dropout, n_classes):
  """ Build the task component
      x         : input to the classifier
      alpha     : width multiplier
      dropout   : dropout percentage
      n_classes : number of output classes
  """
  # flatten the feature maps into 1D feature maps (?, N)
  x = keras.layers.GlobalAveragePooling2D()(x)

  # reshape the feature maps to (?, 1, 1, 1024)
  shape = (1, 1, int(1024 * alpha))
  x = keras.layers.Reshape(shape)(x)
  # apply dropout for preventing overfitting
  x = keras.layers.Dropout(dropout)(x)

  # use convolution for classifying (emulates a fully connected layer)
  x = keras.layers.Conv2D(filters=n_classes, kernel_size=1, padding='same', activation='softmax', kernel_initializer='glorot_uniform')(x)
  # Reshape the resulting output to 1D vector of number of classes
  x = keras.layers.Reshape((n_classes, ))(x)

  return x

## The MobileNet v1 model

In [41]:
# width multiplier (0 .. 1) for reducing number of filters.
alpha      = 1
# resolution multiplier (0 .. 1) for reducing input size
pho        = 1
# dropout rate
dropout    = 0.5
# classes
classes = 1000

In [42]:
def build_mobilenet_v1(alpha=1, pho=1, dropout=0.5, classes=1000):

  # define the input tensor
  inputs = keras.Input(shape=(int(224 * pho), int(224 * pho), 3))
  # stem component
  x = stem(inputs, alpha)
  # learner component
  x = learner(x, alpha)
  # task component
  outputs = task(x, alpha, dropout, classes)

  # Instantiate the Model
  return keras.Model(inputs=inputs, outputs=outputs, name="MobileNet-v1")

In [43]:
# create the MobileNet v1 model
mobilenet_v1_model = build_mobilenet_v1()
# print a summary of the MobileNet v1 model
mobilenet_v1_model.summary()

Model: "MobileNet-v1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 zero_padding2d_16 (ZeroPad  (None, 225, 225, 3)       0         
 ding2D)                                                         
                                                                 
 conv2d_66 (Conv2D)          (None, 112, 112, 32)      864       
                                                                 
 batch_normalization_107 (B  (None, 112, 112, 32)      128       
 atchNormalization)                                              
                                                                 
 re_lu_90 (ReLU)             (None, 112, 112, 32)      0         
                                                                 
 depthwise_conv2d_43 (Depth  (None, 112, 112, 32)     

# MobileNet v2

## The stem component

In [44]:
def stem(inputs, alpha):
  """ Construct the Stem Group
      inputs : input tensor
      alpha  : width multiplier
  """
  # calculate the number of filters for the stem convolution (it must be divisible by 8)
  n_filters = max(8, (int(32 * alpha) + 4) // 8 * 8)

  # apply zero-padding
  x = keras.layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs)
  # a strided 3x3 conv layer followed by batch normalization
  x = keras.layers.Conv2D(filters=n_filters, kernel_size=3, strides=2, padding='valid', use_bias=False, kernel_initializer='glorot_uniform')(x)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.ReLU(6.)(x)

  return x

## The learner component

### Inverted Residual Block

In [45]:
def inverted_block(x, n_filters, alpha, expansion=6, strides=(1, 1)):
  """ Build an inverted residual block
      x         : input to the block
      n_filters : number of filters
      alpha     : width multiplier
      strides   : strides
      expansion : multiplier for expanding number of filters
  """
  # save input
  shortcut = x

  # apply the width filter to the number of feature maps for the pointwise convolution
  filters = int(n_filters * alpha)

  # extract the number of channels
  n_channels = int(x.shape[3])

  # dimensionality expansion (non-first block)
  if expansion > 1:
    # a 1x1 linear convolution followed by batch normalization
    x = keras.layers.Conv2D(filters=expansion * n_channels, kernel_size=1, padding='same', use_bias=False, kernel_initializer='glorot_uniform')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.ReLU(6.)(x)

  # strided convolution to match number of filters
  if strides == (2, 2):
    x = keras.layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(x)
    padding = 'valid'
  else:
    padding = 'same'

  # apply depthwise convolution
  x = keras.layers.DepthwiseConv2D(kernel_size=3, strides=strides, padding=padding, use_bias=False, kernel_initializer='glorot_uniform')(x)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.ReLU(6.)(x)

  # apply linear pointwise convolution
  x = keras.layers.Conv2D(filters=filters, kernel_size=1, strides=1, padding='same', use_bias=False, kernel_initializer='glorot_uniform')(x)
  x = keras.layers.BatchNormalization()(x)

  # number of input filters matches the number of output filters
  if n_channels == filters and strides == (1, 1):
    x = keras.layers.Add()([shortcut, x])

  return x

### Residual Group

In [46]:
def group(x, n_filters, n_blocks, alpha, expansion=6, strides=(2, 2)):
  """ Build an inverted residual group
      x         : input to the group
      n_filters : number of filters
      n_blocks  : number of blocks in the group
      alpha     : width multiplier
      expansion : multiplier for expanding the number of filters
      strides   : whether first inverted residual block is strided.
  """
  # in the first block, the inverted residual block may be strided; feature map size reduction
  x = inverted_block(x, n_filters, alpha, expansion, strides=strides)

  # create the remaining blocks
  for _ in range(n_blocks - 1):
    x = inverted_block(x, n_filters, alpha, expansion, strides=(1, 1))

  return x

### Learner component

In [47]:
def learner(x, alpha, expansion=6):
  """ Build the learner component.
      x        : input to the learner
      alpha    : width multiplier
      expansion: multipler to expand number of filters
  """

  # create the first inverted residual convolution group
  x = group(x, 16, 1, alpha, expansion=1, strides=(1, 1))
  # create the second inverted residual convolution group
  x = group(x, 24, 2, alpha, expansion)
  # create the third inverted residual convolution group
  x = group(x, 32, 3, alpha, expansion)
  # create the fourth inverted residual convolution group
  x = group(x, 64, 4, alpha, expansion)
  # create the fifth inverted residual convolution group
  x = group(x, 96, 3, alpha, expansion, strides=(1, 1))
  # create the sixth inverted residual convolution group
  x = group(x, 160, 3, alpha, expansion)
  # create the seventh inverted residual convolution group
  x = group(x, 320, 1, alpha, expansion, strides=(1, 1))
  # the last block is a 1x1 linear convolutional layer expanding the number of filters to 1280.
  x = keras.layers.Conv2D(filters=1280, kernel_size=1, use_bias=False, kernel_initializer='glorot_uniform')(x)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.ReLU(6.)(x)

  return x

## The task component

In [48]:
def task(x, n_classes):
  """ Build the task component
      x         : input to the classifier
      n_classes : number of output classes
  """
  # flatten the feature maps into 1D feature maps (?, N)
  x = keras.layers.GlobalAveragePooling2D()(x)

  # softmax layer
  x = keras.layers.Dense(units=n_classes, activation='softmax', kernel_initializer='glorot_uniform')(x)

  return x

## The MobileNet v2 model

In [49]:
# width multiplier (0 .. 1) for reducing number of filters.
alpha = 1
# multiplier to expand number of filters
expansion = 6
# classes
classes=1000

In [50]:
def build_mobilenet_v2(alpha=1, expansion=6, shape=(224,224,3), classes=1000):
  # input tensor
  inputs = keras.Input(shape=shape)
  # stem component
  x = stem(inputs, alpha)
  # learner component
  x = learner(x, alpha, expansion)
  # task component
  outputs = task(x, classes)
  # return the model
  return keras.Model(inputs=inputs, outputs=outputs, name="MobileNet-v2")

In [51]:
# create the MobileNet v2 model
mobilenet_v2_model = build_mobilenet_v2()
# print a summary
mobilenet_v2_model.summary()

Model: "MobileNet-v2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_6 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 zero_padding2d_21 (ZeroPad  (None, 225, 225, 3)          0         ['input_6[0][0]']             
 ding2D)                                                                                          
                                                                                                  
 conv2d_81 (Conv2D)          (None, 112, 112, 32)         864       ['zero_padding2d_21[0][0]']   
                                                                                                  
 batch_normalization_134 (B  (None, 112, 112, 32)         128       ['conv2d_81[0][0]']