Welcome to jupyter notebook of MobileNetV2 in Tensorflow</br>
* I have kept this code modular and all in one notebook</br>
* I hope this will facilitate fast swapping of datasets and preprocessing</br>
* For this network it is very important to use tf 1.x in colab </br>
* But because of this generality,architecture maybe not not best for your usecase</br>
* Modifying architecture is tweak in some hyper parameters (i'm not saying finding best of these tweaks is easy)
* Using MobileNetV2 is bit of a overkill for such an easy task but ,..
* This Repo will act as starting point for more complex datasets


In [4]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


* First step would always be preprocessing dataset here i'm using [Emnist](https://www.tensorflow.org/datasets/catalog/emnist)
* I have to expand channels because of monochrome dataset .
* This preprocessing will be same for FMnist and Mnist
* But you might not need this step for RGB datasets (e.g Cifar)

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import h5py
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, CSVLogger, LearningRateScheduler
from datetime import datetime
import emnist as em
from tensorflow.keras.utils import to_categorical

# set meta params
batch_size = 256
nb_classes = 27
nb_epoch   = 5
nb_data    = 28*28

# set meta info
log_dir         = '/content/mobilenetV2-cifar/train_log/mobilenet_v2-like_log'
dataset_dir     = '/content/mobilenetV2-cifar/datasets/dataset_norm'
model_name      = 'mobilenet_v2-like__' + datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
model_arch_path = os.path.join(log_dir, (model_name + '_arch.png'))
model_cp_path   = os.path.join(log_dir, (model_name + '_checkpoint.h5'))
model_csv_path  = os.path.join(log_dir, (model_name + '_csv.csv'))

(trainX, trainY) = em.extract_training_samples('letters')
(testX, testY) = em.extract_test_samples('letters')
# reshape dataset to have a single channel
#(trainX,trainY),(testX,testY)=load_data()

print("Shape of training set before concatenate :",trainX.shape)
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
# one hot encode target values
trainY = to_categorical(trainY)
testY = to_categorical(testY)
trainX = trainX.astype('float32')
testX = testX.astype('float32')
# normalize to range 0-1
trainX = trainX / 255.0
testX = testX / 255.0

# load data
#DF = DataFeeder(dataset_dir)
X_train, y_train, X_test, y_test = trainX,trainY ,testX,testY#DF.X_train, DF.y_train, DF.X_test, DF.y_test

# data augumatation
datagen = ImageDataGenerator(
        featurewise_center=False, 
        featurewise_std_normalization=False, 
        rotation_range=0.0,
        width_shift_range=0.2, 
        height_shift_range=0.2, 
        vertical_flip=False,
        horizontal_flip=True)
datagen.fit(X_train)

* The Building blocks of model are defined here </br>
* If you are totally unaware of MobileNet or DepthWise convolutions
* Start with this great video i found on Youtube :- [link](https://www.youtube.com/watch?v=T7o3xvJLuHk)
* You should comment out layers if you want to decrease the depth .
* Also change the number of filters if you want to change the width
* You can share if you find better hyper parameters for different datasets

In [None]:
import os
import warnings
import numpy as np
from tensorflow.keras.layers import Input, Activation, Conv2D, Dense, Dropout, BatchNormalization, ReLU, DepthwiseConv2D, GlobalAveragePooling2D, GlobalMaxPooling2D, Add
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers

# define the filter size
def _make_divisible(v, divisor, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


# define the calcuration of each 'Res_Block'
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
    prefix = 'block_{}_'.format(block_id)

    in_channels = inputs.shape[-1]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    x = inputs

    # Expand
    if block_id:
        x = Conv2D(expansion * in_channels, kernel_size=1, strides=1, padding='same', use_bias=False, activation=None, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name=prefix + 'expand')(x)
        x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x)
        x = ReLU(6., name=prefix + 'expand_relu')(x)
    else:
        prefix = 'expanded_conv_'

    # Depthwise
    x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same', kernel_initializer="he_normal", depthwise_regularizer=regularizers.l2(4e-5), name=prefix + 'depthwise')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x)
    x = ReLU(6., name=prefix + 'depthwise_relu')(x)

    # Project
    x = Conv2D(pointwise_filters, kernel_size=1, strides=1, padding='same', use_bias=False, activation=None, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name=prefix + 'project')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x)


    if in_channels == pointwise_filters and stride == 1:
        return Add(name=prefix + 'add')([inputs, x])
    return x

# build MobileNetV2 models
def MobileNetV2(input_shape=(28, 28, 1),
                alpha=1.0,
                depth_multiplier=1,
                include_top=True,
                pooling=None,
                classes=27):

    # fileter size (first block)
    first_block_filters = _make_divisible(32 * alpha, 8)
    # input shape  (first block)
    img_input = Input(shape=input_shape)

    # model architechture
    x = Conv2D(first_block_filters, kernel_size=3, strides=1, padding='same', use_bias=False, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name='Conv1')(img_input)
    #x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x)
    #x = ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x, filters=16,  alpha=alpha, stride=1, expansion=1, block_id=0 ) # O/p 28x28

    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=1 ) # O/p 28x28
    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=2 ) # O/p 28x28

    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=2, expansion=6, block_id=3 ) # O/p 14x14
    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=4 ) # O/p 14x14
    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=5 ) # O/p 14x14

    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=2, expansion=6, block_id=6 ) # O/p 7x7
    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=7 ) # O/p 7x7
    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=8 ) # O/p 7x7
    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=9 ) # O/p 7x7
    x = Dropout(rate=0.25)(x)

    # x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=10)
    # x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=11)
    # x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=12)
    # x = Dropout(rate=0.25)(x)

    # x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=13) # O/p 7x7
    # x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=14) # O/p 7x7
    # x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=15) # O/p 7x7
    # x = Dropout(rate=0.25)(x)

    x = _inverted_res_block(x, filters=128, alpha=alpha, stride=1, expansion=6, block_id=16)
    x = Dropout(rate=0.25)(x)

    # define fileter size (last block)
    if alpha > 1.0:
        last_block_filters = _make_divisible(256 * alpha, 8)
    else:
        last_block_filters = 256


    x = Conv2D(last_block_filters, kernel_size=1, use_bias=False, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name='Conv_1')(x)
    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x)
    x = ReLU(6., name='out_relu')(x)
    
    # top layer ("use" or "not use" FC)
    if include_top:
        x = GlobalAveragePooling2D(name='global_average_pool')(x)
        x = Dense(classes, activation='softmax', use_bias=True, name='Logits')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # create model of MobileNetV2 (for CIFAR-10)
    model = Model(inputs=img_input, outputs=x, name='mobilenetv2_cifar10')
    return model



# build model
model = MobileNetV2(input_shape=X_train.shape[1:], include_top=True, alpha=1.0)
model.summary()
print('Model Name: ', model_name)

# save model architechture plot (.png)
from tensorflow.keras.utils import plot_model
plot_model(model, to_file=model_arch_path, show_shapes=True)


# set learning rate
learning_rates=[]
for i in range(5):
    learning_rates.append(2e-2)
for i in range(50-5):
    learning_rates.append(1e-2)
for i in range(100-50):
    learning_rates.append(8e-3)
for i in range(150-100):
    learning_rates.append(4e-3)
for i in range(200-150):
    learning_rates.append(2e-3)
for i in range(300-200):
    learning_rates.append(1e-3)

# set callbacks
callbacks = []
#callbacks.append(TensorBoard(log_dir=log_dir, histogram_freq=1))
callbacks.append(ModelCheckpoint(model_cp_path, monitor='val_loss', save_best_only=True))
callbacks.append(LearningRateScheduler(lambda epoch: float(learning_rates[epoch])))
callbacks.append(CSVLogger(model_csv_path)) 

# compile & learning model #Note Currenly ive used default SGD
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
history = model.fit_generator(
              datagen.flow(X_train, y_train, batch_size=batch_size),
              steps_per_epoch=len(X_train) / batch_size,
              epochs=30,
              verbose=1,
              callbacks=callbacks,
              validation_data=(X_test, y_test))
   
# validation
val_loss, val_acc = model.evaluate(X_test, y_test, verbose=0)
print('Model Name: ', model_name)
print('Test loss     : {:.5f}'.format(val_loss))
print('Test accuracy : {:.5f}'.format(val_acc))



Save the full model all at once

In [None]:
model.save('MobileNetV2.h5')