# ResNet50, MobileNet V1 and MobileNet V2 Implementation

In [45]:
import os
import tensorflow as tf
import numpy as np
import keras
import keras.backend as K
import keras.layers as KL
import keras.engine as KE
import keras.models as KM

## Utility Functions

In [46]:
class BatchNorm(KL.BatchNormalization):
    """
    Why we need batch normalization?
    Batch normalization is a technique to standardize the inputs
    to a neural network, applied to either the activations of a prior layer
    or inputs directly
    
    To prevent internal Covariance shift
    distribution can be different
    whitening: make input features uncorrelated and set variance as 1'
    """
    
    """
    But Batch normalization has a negative effect on training if batches are
    small so this layer can be frozen and functions as linear layer
    """
    def call(self, inputs):
        return super(self.__class__, self).call(inputs)
    

## ResNet Graph

In [47]:
def identity_block(input_tensor, kernel_size, filters, block, use_bias=True):
    """
    use_bias = True
    Weight can decide how fast the activation function will trigger
    but bias can delay the trigerring of the activation fucntion
    bias can help time-consuming and costly part, i.e. data collection
    
    If I can set the name, it it easier to retrieve the output to call
    Each layer's name
    """
    
    nb_filter1, nb_filter2, nb_filter3 = filters
    x = KL.Conv2D(nb_filter1, (1, 1), use_bias=use_bias)(input_tensor)
    x = BatchNorm()(x)
    x = KL.Activation('relu')(x)
    
    x = KL.Conv2D(nb_filter2,(kernel_size, kernel_size), use_bias=use_bias, padding='same')(x)
    x = BatchNorm()(x)
    x = KL.Activation('relu')(x)
    
    x = KL.Conv2D(nb_filter3, (1, 1), use_bias=use_bias)(x)
    x = BatchNorm()(x)

    
    x = KL.Add()([x, input_tensor])
    x = KL.Activation('relu')(x)
    
    return x

def conv_block(input_tensor, kernel_size, filters, block, 
               strides=(2,2), use_bias=True):
    
    nb_filter1, nb_filter2, nb_filter3 = filters
    
    x = KL.Conv2D(nb_filter1, (1, 1), use_bias=use_bias, 
                  strides=strides)(input_tensor)
    x = BatchNorm()(x)
    x = KL.Activation('relu')(x)
    
    x = KL.Conv2D(nb_filter2,(kernel_size, kernel_size),padding='same', 
                  use_bias=use_bias)(x)
    x = BatchNorm()(x)
    x = KL.Activation('relu')(x)
    
    x = KL.Conv2D(nb_filter3, (1, 1), use_bias=use_bias)(x)
    x = BatchNorm()(x)
    x = KL.Activation('relu')(x)
    
    shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, use_bias=use_bias)(input_tensor)
    shortcut = BatchNorm()(shortcut)
    
    x = KL.Add()([x, shortcut])
    x = KL.Activation('relu')(x)
    
    return x

def resnet_graph(architecture, input_image=(256,256,3)):
    """
    Implementation of the popular ResNet50 architecture
    CONV2D - Batchnorm - ReLU - MaxPool - CONV - ID*2 - CONV - ID*3 - CONV
    - ID*5 - CONV - ID*2 - AVGPOOL-flatten-FC
    """
    
    assert architecture in ["resnet50", "resnet101"]
    
    X_input = keras.Input(input_image)
    # Stage 1
    x = KL.ZeroPadding2D((3, 3))(X_input)
    x = KL.Conv2D(64, (7, 7), strides=(2, 2), use_bias=True)(x)
    x = KL.Activation('relu')(x)
    x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
    
    # Stage 2
    x = conv_block(x, 3, [64, 64, 256], block='a', strides=(1,1))
    x = identity_block(x, 3, [64, 64, 256], block='b')
    x = identity_block(x, 3, [64, 64, 256], block='c')
    
    # Stage 3
    x = conv_block(x, 3, [128, 128, 512], block='a')
    x = identity_block(x, 3, [128, 128, 512], block='b')
    x = identity_block(x, 3, [128, 128, 512], block='c')
    x = identity_block(x, 3, [128, 128, 512], block='d')
    
    # Stage 4
    x = conv_block(x, 3, [256, 256, 1024], block='a')
    block_count = {"resnet50": 5, "resnet101": 22}[architecture]
    
    for i in range(block_count):
        x = identity_block(x, 3, [256, 256, 1024], block=chr(98+i))
    
    # Stage 5
    x = conv_block(x, 3, [512, 512, 2048], block='a', strides=(1,1))
    x = identity_block(x, 3, [512, 512, 2048], block='b')
    x = identity_block(x, 3, [512, 512, 2048], block='c')
    
    # AVGPOOL
    x = KL.AveragePooling2D()(x)
    
    # Output Layer
    x = KL.Flatten()(x)
    x = KL.Dense(4, activation = 'softmax')(x)
    

    # Create model
    model = KM.Model(inputs = X_input, outputs = x, name=architecture)
    
    return model

## ResNet50 model compile and Summary

In [6]:
resnetModel = resnet_graph("resnet50")
sgd = keras.optimizers.SGD(lr=1e-6)
resnetModel.compile(optimizer=sgd, loss="categorical_crossentropy", metrics=['accuracy'])
resnetModel.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
zero_padding2d_2 (ZeroPadding2D (None, 262, 262, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_54 (Conv2D)              (None, 128, 128, 64) 9472        zero_padding2d_2[0][0]           
__________________________________________________________________________________________________
activation_54 (Activation)      (None, 128, 128, 64) 0           conv2d_54[0][0]                  
__________________________________________________________________________________________________
max_poolin

## MobileNet V1 Graph

In [93]:
"""
MobileNet v1 models for Keras.
# Reference
- [MobineNets: Efficient Convolutional Neural Networks for Mobile Vision Applications]
   (https://arxiv.org/abs/1704.04861)
# Code modified from: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/mobilenet.py
"""

def _conv_block(input_tensor, filters, alpha, kernel_size=3, strides=(1, 1), block_id=1, use_bias=True):
    """
    Arguments
    
    alpha: controls the width of the network. - If `alpha` < 1.0,
      proportionally decreases the number of filters in each layer. - If
      `alpha` > 1.0, proportionally increases the number of filters in each
      layer. - If `alpha` = 1, default number of filters from the paper are
      used at each layer.
      
    4D tensor with shape: `(samples, channels, rows, cols)` if
      data_format='channels_first'
    or 4D tensor with shape: `(samples, rows, cols, channels)` if
      data_format='channels_last'. # Output shape
    
    """
    print(input_tensor.shape)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    
    filters = int(filters * alpha)
    x = KL.Conv2D(filters, (kernel_size, kernel_size), 
                  padding='same',
                  use_bias=False, 
                  strides=strides,
                  name='conv{}'.format(block_id))(input_tensor)
    x = BatchNorm(axis=channel_axis, name='conv{}_bn'.format(block_id))(x)
    x = KL.Activation('relu', name='conv{}_relu'.format(block_id))(x)
    return x

In [94]:
def _depth_conv_block(input_tensor, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1,1), block_id=1):
    """
    Arguments
    pointwise_conv_filters: Integer, the dimensionality o the output space
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    pointwise_conv_filters = int(pointwise_conv_filters * alpha)
    
    # Depthwise
    x = KL.DepthwiseConv2D((3, 3),
                           padding='same',
                           depth_multiplier=depth_multiplier,
                           strides=strides,
                           use_bias=False,
                           name='conv_depth{}'.format(block_id))(input_tensor)
    x = BatchNorm(axis=channel_axis, name='conv_depth{}_bn'.format(block_id))(x)
    x = KL.Activation('relu', name='conv_depth{}_relu'.format(block_id))(x)
    
    # Pointwise
    x = KL.Conv2D(pointwise_conv_filters, (1,1), padding='same',
                  use_bias=False,
                  strides=(1,1),
                  name='conv_point{}'.format(block_id))(x)
    x = BatchNorm(axis=channel_axis, name='conv_point{}_bn'.format(block_id))(x)
    x = KL.Activation('relu', name='conv_point{}_relu'.format(block_id))(x)
    
    return x

In [95]:
def mobilenetv1_graph(architecture, input_image=(512,512,3), alpha=1.0, depth_multiplier=1):
    """
      Args:
    input_shape: Optional shape tuple, only to be specified if `include_top`
      is False (otherwise the input shape has to be `(224, 224, 3)` (with
      `channels_last` data format) or (3, 224, 224) (with `channels_first`
      data format). It should have exactly 3 inputs channels, and width and
      height should be no smaller than 32. E.g. `(200, 200, 3)` would be one
      valid value. Default to `None`.
      `input_shape` will be ignored if the `input_tensor` is provided.
    alpha: Controls the width of the network. This is known as the width
      multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally
      decreases the number of filters in each layer. - If `alpha` > 1.0,
      proportionally increases the number of filters in each layer. - If
      `alpha` = 1, default number of filters from the paper are used at each
      layer. Default to 1.0.
    depth_multiplier: Depth multiplier for depthwise convolution. This is
      called the resolution multiplier in the MobileNet paper. Default to 1.0.
    dropout: Dropout rate. Default to 0.001.
    include_top: Boolean, whether to include the fully-connected layer at the
      top of the network. Default to `True`.
    weights: One of `None` (random initialization), 'imagenet' (pre-training
      on ImageNet), or the path to the weights file to be loaded. Default to
      `imagenet`.
    input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to
      use as image input for the model. `input_tensor` is useful for sharing
      inputs between multiple different networks. Default to None.
    pooling: Optional pooling mode for feature extraction when `include_top`
      is `False`.
      - `None` (default) means that the output of the model will be
          the 4D tensor output of the last convolutional block.
      - `avg` means that global average pooling
          will be applied to the output of the
          last convolutional block, and thus
          the output of the model will be a 2D tensor.
      - `max` means that global max pooling will be applied.
    classes: Optional number of classes to classify images into, only to be
      specified if `include_top` is True, and if no `weights` argument is
      specified. Defaults to 1000.
    classifier_activation: A `str` or callable. The activation function to use
      on the "top" layer. Ignored unless `include_top=True`. Set
      `classifier_activation=None` to return the logits of the "top" layer.
    **kwargs: For backwards compatibility only.
  Returns:
    A `keras.Model` instance.
  Raises:
    ValueError: in case of invalid argument for `weights`,
      or invalid input shape.
    ValueError: if `classifier_activation` is not `softmax` or `None` when
      using a pretrained top layer.
  """
    assert architecture in ["mobilenetv1"]
    X_input = keras.Input(input_image)
    x = _conv_block(X_input, 32, alpha, strides=(2,2), block_id=0)
    x = _depth_conv_block(x, 64, alpha, strides=(1,1), block_id=1)
    
    x = _depth_conv_block(x, 128, alpha, depth_multiplier, strides=(2,2), block_id=2)
    x = _depth_conv_block(x, 128, alpha, depth_multiplier, strides=(1,1), block_id=3)
    
    x = _depth_conv_block(x, 256, alpha, depth_multiplier, strides=(2,2), block_id=4)
    x = _depth_conv_block(x, 256, alpha, depth_multiplier, strides=(1,1), block_id=5)
    
    x = _depth_conv_block(x, 512, alpha, depth_multiplier, strides=(2,2), block_id=6)
    x = _depth_conv_block(x, 512, alpha, depth_multiplier, strides=(1,1), block_id=7)
    x = _depth_conv_block(x, 512, alpha, depth_multiplier, strides=(1,1), block_id=8)
    x = _depth_conv_block(x, 512, alpha, depth_multiplier, strides=(1,1), block_id=9)
    x = _depth_conv_block(x, 512, alpha, depth_multiplier, strides=(1,1), block_id=10)
    x = _depth_conv_block(x, 512, alpha, depth_multiplier, strides=(1,1), block_id=11)
    
    x = _depth_conv_block(x, 1024, alpha, depth_multiplier, strides=(2,2), block_id=12)
    x = _depth_conv_block(x, 1024, alpha, depth_multiplier, strides=(1,1), block_id=13)
    
    shape = (int(1024*alpha), 1, 1) if K.image_data_format() == 'channel_first' else (1,1,int(1024*alpha))
    # pooling and FC and activation function, After extracting features
    # Output Layer
    x = KL.GlobalAveragePooling2D()(x)
    x = KL.Reshape(shape, name='reshape_1')(x)
    x = KL.Dropout(1e-6, name='dropout')(x)
    x = KL.Conv2D(4, (1,1), padding='same', name='conv_preds')(x)
    x = KL.Reshape((4,), name='reshape_2')(x)
    x = KL.Activation('softmax', name='predictions')(x)
    
    model = KM.Model(inputs = X_input, outputs = x, name=architecture)
    
    return model

In [96]:
mobilenetv1Model = mobilenetv1_graph("mobilenetv1")
sgd = keras.optimizers.SGD(lr=1e-6)
mobilenetv1Model.compile(optimizer=sgd, loss="categorical_crossentropy", metrics=['accuracy'])
mobilenetv1Model.summary()

(?, 512, 512, 3)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_24 (InputLayer)        (None, 512, 512, 3)       0         
_________________________________________________________________
conv0 (Conv2D)               (None, 256, 256, 32)      864       
_________________________________________________________________
conv0_bn (BatchNorm)         (None, 256, 256, 32)      128       
_________________________________________________________________
conv0_relu (Activation)      (None, 256, 256, 32)      0         
_________________________________________________________________
conv_depth1 (DepthwiseConv2D (None, 256, 256, 32)      288       
_________________________________________________________________
conv_depth1_bn (BatchNorm)   (None, 256, 256, 32)      128       
_________________________________________________________________
conv_depth1_relu (Activation (None, 256, 256, 32)      0   

## MobileNet V2 Graph

In [162]:
"""
MobileNet v2 models for Keras.
# Reference
- [MobileNetV2: Inverted Residuals and Linear Bottlenecks]
   (https://arxiv.org/abs/1801.04381)
# Code modified from: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/applications/mobilenet.py
"""
def _inverted_res_block(inputs, filters, kernel, stride, expansion, block_id, alpha=1.0):
    channel_axis = 1 if K.image_data_format() == 'channel_first' else -1
    
    in_channel = K.int_shape(inputs)[channel_axis]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    
    x = inputs
    x = _conv_block(inputs, expansion * in_channel, alpha, 1, (1, 1), block_id=block_id)

    x = KL.DepthwiseConv2D(kernel, strides=stride, depth_multiplier=1, padding='same', name='conv_dw_{}'.format(block_id))(x)
    x = BatchNorm(axis=channel_axis, name='conv_dw_{}_bn'.format(block_id))(x)
    x = KL.Activation('relu', name='conv_dw{}_relu'.format(block_id))(x)
    
    x = KL.Conv2D(pointwise_filters, (1, 1), strides=(1, 1), padding='same', name='conv_pw_{}'.format(block_id))(x)
    x = BatchNorm(axis=channel_axis, name='conv_pw_{}_bn'.format(block_id))(x)
    
    if stride == (1,1) and pointwise_filters==filters and x.shape[-1] == inputs.shape[-1]: 
        x = KL.add([x, inputs], name='res{}'.format(block_id))
    return x
        

In [163]:
def _make_divisible(v, divisor, min_value=None):
    if min_value is None: min_value = divisor
    new_v = max(min_value, int(v+divisor/2) // divisor*divisor)
    
    if new_v < 0.9 * v: new_v += divisor
    
    return new_v

In [166]:
def mobilenetv2_graph(architecture, inputs=(224,224,3), alpha=1.0):
    assert architecture in ["mobilenetv2"]
    X_input = keras.Input(inputs)
    
    x = _conv_block(X_input, 32, alpha, strides=(2,2), block_id=0)
    
    x = _inverted_res_block(x, 16, 3, (1,1), 1, block_id=1)
    
    x = _inverted_res_block(x, 24, 3, (2,2), 6, block_id=2)
    x = _inverted_res_block(x, 24, 3, (1,1), 6, block_id=3)
    
    x = _inverted_res_block(x, 32, 3, (2,2), 6, block_id=4)
    x = _inverted_res_block(x, 32, 3, (1,1), 6, block_id=5)
    x = _inverted_res_block(x, 32, 3, (1,1), 6, block_id=6)
    
    x = _inverted_res_block(x, 64, 3, (2,2), 6, block_id=7)
    x = _inverted_res_block(x, 64, 3, (1,1), 6, block_id=8)
    x = _inverted_res_block(x, 64, 3, (1,1), 6, block_id=9)
    x = _inverted_res_block(x, 64, 3, (1,1), 6, block_id=10)
    
    x = _inverted_res_block(x, 96, 3, (1,1), 6, block_id=11)
    x = _inverted_res_block(x, 96, 3, (1,1), 6, block_id=12)
    x = _inverted_res_block(x, 96, 3, (1,1), 6, block_id=13)
    
    x = _inverted_res_block(x, 160, 3, (2,2), 6, block_id=14)
    x = _inverted_res_block(x, 160, 3, (1,1), 6, block_id=15)
    x = _inverted_res_block(x, 160, 3, (1,1), 6, block_id=16)
    
    x = _inverted_res_block(x, 320, 3, (1,1), 6, block_id=17)
    
    last_block_filters = 1280
    x = KL.Conv2D(last_block_filters, 1, use_bias=False, name='last_conv_1')(x)
    x = BatchNorm(axis=-1, name='last_bn_1')(x)
    x = KL.Activation('relu', name='last_relu')(x)
    x = KL.GlobalAveragePooling2D()(x)
    x = KL.Dense(4, activation='softmax', name='predictions')(x)
    
    model = KM.Model(inputs=X_input, output=x, name=architecture)
    
    return model

In [169]:
mobilenetv2Model = mobilenetv2_graph("mobilenetv2")
sgd = keras.optimizers.SGD(lr=1e-6)
mobilenetv2Model.compile(optimizer=sgd, loss="categorical_crossentropy", metrics=['accuracy'])
mobilenetv2Model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_42 (InputLayer)           (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv0 (Conv2D)                  (None, 112, 112, 32) 864         input_42[0][0]                   
__________________________________________________________________________________________________
conv0_bn (BatchNorm)            (None, 112, 112, 32) 128         conv0[0][0]                      
__________________________________________________________________________________________________
conv0_relu (Activation)         (None, 112, 112, 32) 0           conv0_bn[0][0]                   
__________________________________________________________________________________________________
conv1 (Con

