# MobileNet V2 in TensorFlow

Based on this [paper](https://arxiv.org/pdf/1801.04381.pdf)

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers as ly



In [2]:
def bn_act(x, bn=True, act=True):
    if bn:
        x = ly.BatchNormalization(epsilon=1e-3,momentum=0.999)(x)
    if act:
        x = ly.Activation(tf.nn.relu6)(x)
    return x

In [3]:
def MBConv(x, n_in, n_out, expansion, ks=3, strides=1, dropout=0.1):
    skip_connection = (strides == 1)
    padding = (ks-1)//2
    if expansion != 1:
        # Expand Pointwise
        x = ly.Conv2D(expansion * n_in, kernel_size=1, padding='same', use_bias=False,
                      activation=None)(x)
        x = bn_act(x)
    ## Depthwise
    if strides == 2:
        x = ly.ZeroPadding2D(padding=padding)(x)
    x = ly.DepthwiseConv2D(kernel_size=ks, strides=strides, activation=None, use_bias=False, 
                          padding='same' if strides == 1 else 'valid')(x)
    x = bn_act(x)
    x = ly.Conv2D(n_out, (1, 1), padding='same', activation=None, use_bias=False)(x)
    x = bn_act(x, act=False)
    return x

In [4]:
widths = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
depths = [1, 2, 3, 4, 3, 3, 1]
strides = [1, 2, 2, 2, 1, 2, 1]

In [5]:
def MobileNetV2(n_classes=1000):
    inputs = keras.Input(shape=(224, 224, 3))
    stem = ly.Conv2D(widths[0], (3, 3), strides=(2, 2), padding='same', use_bias=False)(inputs)
    x = bn_act(stem)
    
    for i in range(7):
        depth = depths[i]
        stride = strides[i]
        w_in = widths[i]
        w_out = widths[i + 1]
        x = MBConv(x, w_in, w_out, expansion= 1 if i == 0 else 6, strides=stride)
        for j in range(1, depth):
            x = MBConv(x, w_out, w_out, expansion= 1 if i == 0 else 6)
    
    x = ly.Conv2D(1280, kernel_size=1, use_bias=False)(x)
    x = bn_act(x)
    x = ly.GlobalAveragePooling2D()(x)
    x = ly.Dense(n_classes)(x)
    return keras.Model(inputs=inputs, outputs=x, name="mobilenetv2")

In [6]:
mobilenet = MobileNetV2()

Metal device set to: Apple M1


In [7]:
%%time
mobilenet(np.random.random((1, 224, 224, 3))).shape 

CPU times: user 96.9 ms, sys: 29 ms, total: 126 ms
Wall time: 110 ms


TensorShape([1, 1000])

In [8]:
def fmat(n):
    return "{:.2f}M".format(n / 1e6)

In [9]:
def params(model, f = True):
    count = int(np.sum([np.prod(p.shape) for p in model.trainable_variables]))
    return fmat(count) if f else count

In [10]:
print(params(mobilenet))

3.50M


In [11]:
mobilenet.summary()

Model: "mobilenetv2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 112, 112, 32)      864       
                                                                 
 batch_normalization (BatchN  (None, 112, 112, 32)     128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 112, 112, 32)      0         
                                                                 
 depthwise_conv2d (Depthwise  (None, 112, 112, 32)     288       
 Conv2D)                                                         
                                                                 
 batch_normalization_1 (Batc  (None, 112, 112, 32)     