In [1]:
from tensorflow import keras
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt

from keras.layers import Conv2D, MaxPool2D, Flatten, Dense

from keras.preprocessing.image import ImageDataGenerator
from keras.utils import load_img
from keras.utils import img_to_array
from keras.applications import imagenet_utils
import os

import pandas as pd
import tensorflow_datasets as tfds
import tensorflow_model_optimization as tfmot

In [2]:
model = tf.keras.applications.MobileNet(weights='imagenet', input_shape=(224, 224, 3), alpha=0.25
                                             )  #include_preprocessing=False)

Metal device set to: Apple M2 Pro


2023-03-12 15:48:13.850205: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-03-12 15:48:13.850225: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
model.save("mobilenet.h5")



In [4]:
model.summary()

Model: "mobilenet_0.25_224"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1 (Conv2D)              (None, 112, 112, 8)       216       
                                                                 
 conv1_bn (BatchNormalizatio  (None, 112, 112, 8)      32        
 n)                                                              
                                                                 
 conv1_relu (ReLU)           (None, 112, 112, 8)       0         
                                                                 
 conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 8)      72        
                                                                 
 conv_dw_1_bn (BatchNormaliz  (None, 112, 112, 8)      32        
 ation)                                         

In [5]:
# Representative dataset
def representative_dataset(dataset):
    def _data_gen():
        for data in dataset.batch(1):
            yield [data['image']]

    return _data_gen

In [6]:
def eval_tflite(tflite_model, dataset):
    """Evaluates tensorflow lite classification model with the given dataset."""
    interpreter = tf.lite.Interpreter(model_content=tflite_model)
    interpreter.allocate_tensors()

    input_idx = interpreter.get_input_details()[0]['index']
    output_idx = interpreter.get_output_details()[0]['index']

    results = []

    for data in representative_dataset(dataset)():
        interpreter.set_tensor(input_idx, data[0])
        interpreter.invoke()
        results.append(interpreter.get_tensor(output_idx).flatten())

    results = np.array(results)
    gt_labels = np.array(list(dataset.map(lambda data: data['label'])))
    accuracy = (
            np.sum(np.argsort(results, axis=1)[:, -1:] == gt_labels.reshape(-1, 1)) /
            gt_labels.size)
    print(f'Top-1 accuracy (quantized): {accuracy * 100:.2f}%')

In [7]:
model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [8]:
import imagenet_mini

tr_ds = imagenet_mini.get_imagenet_mini_dataset("train")
tr_ds = tr_ds.map(imagenet_mini.get_preprocess_image_fn(image_size=(224, 224)))

train_ds = tr_ds\
    .map(lambda data: (data['image'], data['label']))\
    .batch(256)

ds = imagenet_mini.get_imagenet_mini_dataset("val")
ds = ds.map(imagenet_mini.get_preprocess_image_fn(image_size=(224, 224)))
test_ds = ds.map(lambda data: (data['image'], data['label'])).batch(256)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [9]:
loss, acc = model.evaluate(test_ds)
print(f'Top-1 accuracy (float): {acc * 100:.2f}%')

2023-03-12 15:48:14.839654: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-03-12 15:48:15.108252: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Top-1 accuracy (float): 36.66%


In [20]:
from tf_quantization.quantize_model import quantize_model

# q_aware stands for for quantization aware.
quant_layer_conf = {"weight_bits": 8, "activation_bits": 8}
q_aware_model = quantize_model(model, [quant_layer_conf for i in range(36)] + [{"weight_bits": 8, "activation_bits": 8}])

q_aware_model.summary()

Layers map:
[{'conv_dw_5', 'conv_dw_5_relu', 'conv_dw_5_bn'}, {'conv_dw_13_relu', 'conv_dw_13_bn', 'conv_dw_13'}, {'conv_dw_11_bn', 'conv_dw_11_relu', 'conv_dw_11'}, {'conv_pw_13_bn', 'conv_pw_13', 'conv_pw_13_relu'}, {'conv_pad_2'}, {'conv_pw_5', 'conv_pw_5_relu', 'conv_pw_5_bn'}, {'conv_pw_12_relu', 'conv_pw_12', 'conv_pw_12_bn'}, {'conv_dw_10_bn', 'conv_dw_10_relu', 'conv_dw_10'}, {'conv_pw_6_bn', 'conv_pw_6_relu', 'conv_pw_6'}, {'conv_pw_7_relu', 'conv_pw_7', 'conv_pw_7_bn'}, {'conv_dw_2_relu', 'conv_dw_2_bn', 'conv_dw_2'}, {'conv_pw_1', 'conv_pw_1_relu', 'conv_pw_1_bn'}, {'conv_dw_7', 'conv_dw_7_bn', 'conv_dw_7_relu'}, {'conv_dw_3_relu', 'conv_dw_3', 'conv_dw_3_bn'}, {'conv1_relu', 'conv1', 'conv1_bn'}, {'conv_pw_9', 'conv_pw_9_relu', 'conv_pw_9_bn'}, {'conv_pw_8_bn', 'conv_pw_8', 'conv_pw_8_relu'}, {'conv_pw_3_bn', 'conv_pw_3', 'conv_pw_3_relu'}, {'conv_preds'}, {'predictions'}, {'dropout'}, {'global_average_pooling2d'}, {'conv_dw_12', 'conv_dw_12_bn', 'conv_dw_12_relu'}, {'conv_

In [19]:
import calculate_model_size

size = calculate_model_size.calculate_weights_mobilenet_size(q_aware_model)
print("Weights Size: %.3f Mb" % (size / 2**20))

Layer input_1: 0
Layer quantize_layer_2: 0
Layer conv1_bnfolded: 2240
Layer quant_conv1_relu: 0
Layer conv_dw_1_bnfolded: 1088
Layer quant_conv_dw_1_relu: 0
Layer conv_pw_1_bnfolded: 2048
Layer quant_conv_pw_1_relu: 0
Layer quant_conv_pad_2: 0
Layer conv_dw_2_bnfolded: 2176
Layer quant_conv_dw_2_relu: 0
Layer conv_pw_2_bnfolded: 6144
Layer quant_conv_pw_2_relu: 0
Layer conv_dw_3_bnfolded: 4352
Layer quant_conv_dw_3_relu: 0
Layer conv_pw_3_bnfolded: 10240
Layer quant_conv_pw_3_relu: 0
Layer quant_conv_pad_4: 0
Layer conv_dw_4_bnfolded: 4352
Layer quant_conv_dw_4_relu: 0
Layer conv_pw_4_bnfolded: 20480
Layer quant_conv_pw_4_relu: 0
Layer conv_dw_5_bnfolded: 8704
Layer quant_conv_dw_5_relu: 0
Layer conv_pw_5_bnfolded: 36864
Layer quant_conv_pw_5_relu: 0
Layer quant_conv_pad_6: 0
Layer conv_dw_6_bnfolded: 8704
Layer quant_conv_dw_6_relu: 0
Layer conv_pw_6_bnfolded: 73728
Layer quant_conv_pw_6_relu: 0
Layer conv_dw_7_bnfolded: 17408
Layer quant_conv_dw_7_relu: 0
Layer conv_pw_7_bnfolded: 13

In [10]:
# Compile model to disable weights learnings and allows model to learn activation quant ranges
q_aware_model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                      metrics=['accuracy'])

q_aware_model.fit(train_ds.take(1), epochs=1)

2023-03-10 15:24:29.464473: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Tensor("mobilenet_1.00_224/quant_conv1_relu/Relu6:0", shape=(None, 112, 112, 32), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_1_relu/Relu6:0", shape=(None, 112, 112, 32), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_1_relu/Relu6:0", shape=(None, 112, 112, 64), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_2_relu/Relu6:0", shape=(None, 56, 56, 64), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_2_relu/Relu6:0", shape=(None, 56, 56, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_3_relu/Relu6:0", shape=(None, 56, 56, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_3_relu/Relu6:0", shape=(None, 56, 56, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_4_relu/Relu6:0", shape=(None, 28, 28, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_4_relu/Relu6:0", shape=(None, 28, 28, 256), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_5_relu/Relu6:0", shape=(None, 28, 28, 256), dtype=float32)
Te

2023-03-10 15:24:33.180580: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


 29/586 [>.............................] - ETA: 1:22:43 - loss: 6.5302 - accuracy: 0.0432

KeyboardInterrupt: 

In [14]:
qa_loss, qa_acc = q_aware_model.evaluate(test_ds)
print(f'Top-1 accuracy (quantized, quant ranges updated): {qa_acc * 100:.2f}%')

RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.

In [None]:

q_aware_model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                      metrics=['accuracy'])

q_aware_model.fit(train_ds, epochs=5, validation_data=test_ds)

In [15]:
qa_loss, qa_acc = q_aware_model.evaluate(test_ds)
print(f'Top-1 accuracy (quantize aware float): {qa_acc * 100:.2f}%')

Top-1 accuracy (quantize aware float): 52.20%


In [16]:
q_aware_model.save("q_aware_model.h5")

In [17]:
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_model = converter.convert()
eval_tflite(quantized_model, ds)

Tensor("mobilenet_1.00_224/quant_conv1_relu/Relu6:0", shape=(None, 112, 112, 32), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_1_relu/Relu6:0", shape=(None, 112, 112, 32), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_1_relu/Relu6:0", shape=(None, 112, 112, 64), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_2_relu/Relu6:0", shape=(None, 56, 56, 64), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_2_relu/Relu6:0", shape=(None, 56, 56, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_3_relu/Relu6:0", shape=(None, 56, 56, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_3_relu/Relu6:0", shape=(None, 56, 56, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_4_relu/Relu6:0", shape=(None, 28, 28, 128), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_pw_4_relu/Relu6:0", shape=(None, 28, 28, 256), dtype=float32)
Tensor("mobilenet_1.00_224/quant_conv_dw_5_relu/Relu6:0", shape=(None, 28, 28, 256), dtype=float32)
Te



INFO:tensorflow:Assets written to: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpm8y8uzbd/assets


INFO:tensorflow:Assets written to: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpm8y8uzbd/assets
2023-03-10 12:00:38.721800: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-03-10 12:00:38.721812: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-03-10 12:00:38.722134: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpm8y8uzbd
2023-03-10 12:00:38.739570: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-03-10 12:00:38.739579: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpm8y8uzbd
2023-03-10 12:00:38.793277: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-03-10 12:00:38.801799: I tensorflow/cc/saved_model/load

Evaluated 100
Evaluated 200
Evaluated 300
Evaluated 400
Evaluated 500
Evaluated 600
Evaluated 700
Evaluated 800
Evaluated 900
Evaluated 1000
Top-1 accuracy (quantized): 2.00%


In [18]:
with open('model.tflite', 'wb') as f:
  f.write(quantized_model)

In [31]:
converter2 = tf.lite.TFLiteConverter.from_keras_model(model)
converter2.optimizations = [tf.lite.Optimize.DEFAULT]
converter2.representative_dataset = representative_dataset(tr_ds.take(1000))
#converter2.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
#converter2.inference_input_type = tf.int8  # or tf.uint8
#converter2.inference_output_type = tf.int8  # or tf.uint8
original_quantized_model = converter2.convert()



INFO:tensorflow:Assets written to: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpdvttb3h2/assets


INFO:tensorflow:Assets written to: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpdvttb3h2/assets
2023-03-09 21:22:27.891470: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2023-03-09 21:22:27.891483: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2023-03-09 21:22:27.891568: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpdvttb3h2
2023-03-09 21:22:27.898783: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-03-09 21:22:27.898794: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /var/folders/zf/hqnjy5w17j5fsx1sf1rz_70r0000gn/T/tmpdvttb3h2
2023-03-09 21:22:27.922928: I tensorflow/cc/saved_model/loader.cc:229] Restoring SavedModel bundle.
2023-03-09 21:22:28.018585: I tensorflow/cc/saved_model/loader.cc:213] Running initialization

In [25]:
with open('original_qmodel.tflite', 'wb') as f:
  f.write(original_quantized_model)

In [32]:
eval_tflite(original_quantized_model, ds)

Evaluated 100
Evaluated 200
Evaluated 300
Evaluated 400
Evaluated 500
Evaluated 600
Evaluated 700
Evaluated 800
Evaluated 900
Evaluated 1000
Top-1 accuracy (quantized): 53.40%
