<a href="https://colab.research.google.com/github/CelikAbdullah/deep-learning-notebooks/blob/main/Computer%20Vision/models/ShuffleNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
from tensorflow import keras
from tensorflow.keras import backend as K

# ShuffleNet

## The stem component

In [38]:
def stem(inputs):
  ''' Build the stem component of ShuffleNet
      inputs : input image (tensor)
  '''
  # a strided 3x3 conv layer followed by batch normalization and max pooling
  x = keras.layers.Conv2D(filters=24, kernel_size=3, strides=2, padding='same', use_bias=False)(inputs)
  x = keras.layers.BatchNormalization()(x)
  x = keras.layers.ReLU()(x)
  x = keras.layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x)

  return x

## The learner component

### Channel Shuffle Layer

In [39]:
def channel_shuffle(x, n_partitions):
  ''' Build the channel shuffle layer
      x            : input tensor
      n_partitions : number of groups to partition feature maps (channels) into.
  '''
  # extract dimensions of the input tensor
  batch, height, width, n_filters = x.shape

  # calculate the number of input filters (channels) per group
  grp_in_filters  = n_filters // n_partitions

  # separate out the channel groups
  x = keras.layers.Lambda(lambda z: K.reshape(z, [-1, height, width, n_partitions, grp_in_filters]))(x)
  # transpose the order of the channel groups (i.e., 3, 4 => 4, 3)
  x = keras.layers.Lambda(lambda z: K.permute_dimensions(z, (0, 1, 2, 4, 3)))(x)
  # restore shape
  x = keras.layers.Lambda(lambda z: K.reshape(z, [-1, height, width, n_filters]))(x)

  return x

### Pointwise Group Convolution

In [40]:
def pw_group_conv(x, n_partitions, n_filters):
  ''' Build a pointwise group convolution
      x        : input tensor
      n_groups : number of groups to partition feature maps (channels) into.
      n_filers : number of filters
  '''

  # calculate the number of input filters (channels)
  in_filters = x.shape[3]
  # calculate the number of input filters (channels) per group
  grp_in_filters  = in_filters // n_partitions
  # calculate the number of output filters per group (Note the rounding up)
  grp_out_filters = int(n_filters / n_partitions + 0.5)
  # perform convolution across each channel group
  groups = []

  for i in range(n_partitions):
    # slice the input across channel group
    group = keras.layers.Lambda(lambda x: x[:, :, :, grp_in_filters * i: grp_in_filters * (i + 1)])(x)

    # perform convolution on channel group
    conv = keras.layers.Conv2D(filters=grp_out_filters, kernel_size=1, padding='same', strides=1, use_bias=False)(group)
    # maintain the point-wise group convolutions in a list
    groups.append(conv)

  if len(groups) > 1:
    # Concatenate the outputs of the group pointwise convolutions together
    x = keras.layers.Concatenate()(groups)
  else:
    x = groups[0]

  # Do batch normalization of the concatenated filters (feature maps)
  x = keras.layers.BatchNormalization()(x)

  return x

### Shuffle Block

In [41]:
def shuffle_block(x, n_partitions, n_filters, reduction):
  ''' Build a shuffle block
      x           : input to the block
      n_partitions: number of groups to partition feature maps (channels) into.
      n_filters   : number of filters
      reduction   : dimensionality reduction factor (e.g, 0.25)
  '''

  # identity shortcut
  shortcut = x

  # pointwise group convolution, with dimensionality reduction
  x = pw_group_conv(x, n_partitions, int(reduction * n_filters))
  x = keras.layers.ReLU()(x)

  # channel shuffle layer
  x = channel_shuffle(x, n_partitions)

  # create a depthwise 3x3 convolution followed by batch normalization
  x = keras.layers.DepthwiseConv2D((3, 3), strides=1, padding='same', use_bias=False)(x)
  x = keras.layers.BatchNormalization()(x)

  # pointwise group convolution, with dimensionality restoration
  x = pw_group_conv(x, n_partitions, n_filters)

  # add the identity shortcut (input added to output)
  x = keras.layers.Add()([shortcut, x])
  x = keras.layers.ReLU()(x)

  return x

### Strided Shuffle Block

In [42]:
def strided_shuffle_block(x, n_partitions, n_filters, reduction):
  ''' Build a strided shuffle block
      x           : input to the block
      n_partitions: number of groups to partition feature maps (channels) into.
      n_filters   : number of filters
      reduction   : dimensionality reduction factor (e.g, 0.25)
  '''

  # create a projection shortcut
  shortcut = x
  shortcut = keras.layers.AveragePooling2D(pool_size=3, strides=2, padding='same')(shortcut)

  # On entry block, we need to adjust the number of output filters
  # of the entry pointwise group convolution to match the exit
  # pointwise group convolution, by subtracting the number of input filters
  n_filters -= int(x.shape[3])

  # pointwise group convolution, with dimensionality reduction
  x = pw_group_conv(x, n_partitions, int(reduction * n_filters))
  x = keras.layers.ReLU()(x)

  # create a channel shuffle layer
  x = channel_shuffle(x, n_partitions)

  # create a depthwise 3x3 strided convolution
  x = keras.layers.DepthwiseConv2D(kernel_size=3, strides=2, padding='same', use_bias=False)(x)
  x = keras.layers.BatchNormalization()(x)

  # pointwise group convolution, with dimensionality restoration
  x = pw_group_conv(x, n_partitions, n_filters)

  # concatenate the projection shortcut to the output
  x = keras.layers.Concatenate()([shortcut, x])
  x = keras.layers.ReLU()(x)

  return x

### Shuffle Group

In [43]:
def group(x, n_partitions, n_blocks, n_filters, reduction):
  ''' Build a shuffle group
      x            : input to the group
      n_partitions : number of groups to partition feature maps (channels) into.
      n_blocks     : number of shuffle blocks for this group
      n_filters    : number of output filters
      reduction    : dimensionality reduction
  '''

  # create a first block which is a strided shuffle block
  x = strided_shuffle_block(x, n_partitions, n_filters, reduction)

  # create the remaining shuffle blocks in the group
  for _ in range(n_blocks-1):
    x = shuffle_block(x, n_partitions, n_filters, reduction)

  return x

### Learner component

In [44]:
def learner(x, groups, n_partitions, filters, reduction):
  ''' Build the learner component of ShuffleNet
	    x            : input to the learner
      groups       : number of shuffle blocks per shuffle group
      n_partitions : number of groups to partition feature maps (channels) into.
      filters      : number of filters per shuffle group
      reduction    : dimensionality reduction on entry to a shuffle block
  '''

  assert len(groups) == len(filters)-1

  # create the shuffle groups
  for i in range(len(groups)):
    x = group(x, n_partitions, groups[i], filters[i+1], reduction)

  return x

## The task component

In [45]:
def task(x, n_classes):
  ''' Build the task component
      x         : input to the classifier
      n_classes : number of output classes
  '''
  # Use global average pooling to flatten feature maps to 1D vector, where
  # each feature map is a single averaged value (pixel) in flatten vector
  x = keras.layers.GlobalAveragePooling2D()(x)
  # softmax layer
  x = keras.layers.Dense(units=n_classes, activation='softmax')(x)

  return x

## The ShuffleNet model

In [46]:
# number of groups to partition the filters (channels)
n_partitions=2

# number of groups to partition feature maps (key), and
# corresponding number of output filters (value)
filters = {
        1: [24, 144, 288, 576],
        2: [24, 200, 400, 800],
        3: [24, 240, 480, 960],
        4: [24, 272, 544, 1088],
        8: [24, 384, 768, 1536]
}

# the dimensionality reduction on entry to a shuffle block
reduction = 0.25

# number of shuffle blocks per shuffle group
groups = [4, 8, 4 ]

In [47]:
def build_shufflenet(groups, filters, reduction=0.25, n_partitions = 2, shape=(224, 224, 3), classes=1000):
  # input tensor
  inputs = keras.Input(shape=shape)
  # stem component
  x = stem(inputs)
  # learner component
  x = learner(x, groups, n_partitions, filters[n_partitions], reduction)
  # task component
  outputs = task(x, classes)

  return keras.Model(inputs=inputs, outputs=outputs, name="ShuffleNet")

In [48]:
# create a ShuffleNet model
shufflenet_model = build_shufflenet(groups, filters)
# print a summary
shufflenet_model.summary()

Model: "ShuffleNet"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv2d_71 (Conv2D)          (None, 112, 112, 24)         648       ['input_4[0][0]']             
                                                                                                  
 batch_normalization_53 (Ba  (None, 112, 112, 24)         96        ['conv2d_71[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 re_lu_37 (ReLU)             (None, 112, 112, 24)         0         ['batch_normalization