# Chapter 4
## Pre-trained MobileNet
## Quantization

In [1]:
import tensorflow as tf
print ('TesnsorFlow version:', tf.__version__)
print ('Keras version: ', tf.keras.__version__)

TesnsorFlow version: 1.13.1
Keras version:  2.2.4-tf


In [2]:
# Load test images
from glob import glob
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

images = glob('test_images/*.jpg')

# View an example of an image
# example = mpimg.imread(images[0])
# plt.imshow(example)
# plt.show()
# print('Showing example image, image dimensions: ', example.shape)

In [3]:
# This will download a new version of MobileNet as needed
from tensorflow.keras.applications.mobilenet import MobileNet, decode_predictions

def mnet_predict(img, w_scale):
    global model_mnet
    
    # Load the pre-trained model
    if not model_mnet:
        model_mnet = MobileNet(weights='imagenet', alpha=w_scale)

    # Perform inference on our pre-processed image
    predictions = model_mnet.predict(img)

    return decode_predictions(predictions, top=3)[0]

In [4]:
# # Here, we'll load an image and pre-process it
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet import preprocess_input
import numpy as np

def mnet_process(w_scale):
    for i in range (len(images)):
        img_path = images[i]
        img = image.load_img(img_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        results = mnet_predict(x, w_scale)
        example = mpimg.imread(img_path)
        #plt.imshow(example) 
        #plt.show()
        #print('Prediction Results: ', results[0][1:], results[1][1:], results[2][1:])

In [5]:
def to_quantized_tflite(network_name):
# For tensorflow versions <= 1.12 lite is in contrib. For later versions of tensorflow use it 
# directly from tf.
    converter = None
    if (tf.__version__ <= "1.12.0"):
        converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(network_name + ".h5")
        #converter.optimizations = [tf.contrib.lite.Optimize.OPTIMIZE_FOR_SIZE]
        converter.post_training_quantize = true
    else:
        converter = tf.lite.TFLiteConverter.from_keras_model_file(network_name + ".h5")
        #converter.optimizations = [tf.lite.constants.QUANTIZED_UINT8]
        converter.post_training_quantize = True
    
    tfmodel   = converter.convert()
    
    try:
        fd = open(network_name + "_quantized" + ".tflite", "wb")
        fd.write(tfmodel)
        fd.flush()
        fd.close()
    except:
        print("Error in file i/o: ")

In [6]:
# Model alpha / network width = 1
network_name = "mnet_imagenet_1"
model_mnet = None
w_scale = 1
mnet_process(w_scale)
model_mnet.save(network_name + ".h5")
to_quantized_tflite(network_name)
print ("Mnet alpha 1 quantized...")

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.compat.v1.graph_util.convert_variables_to_constants
Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph
INFO:tensorflow:Froze 137 variables.
INFO:tensorflow:Converted 137 variables to const ops.
Mnet alpha 1 quantized...


In [9]:
%%bash
ls -s *.tflite *.h5

16872 mnet_imagenet_1.h5
 4180 mnet_imagenet_1_quantized.tflite


### As expected, the model size reduced by ~4 times (float (4 bytes) -> int8 (1 byte) )