In [9]:
import tensorflow as tf
import numpy as np
from scipy.ndimage import zoom
import torch

# Load TensorFlow MNIST data
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize and reshape
train_images = train_images / 255.0
test_images = test_images / 255.0
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1)
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)

In [8]:
folder = "../model/"
#model_tf = tf.keras.models.load_model(folder + 'model_tf')

model_tf = tf.keras.models.load_model(folder + 'model_tf.h5')



In [13]:
# Resizing function
def resize_images(images):
    return np.array([zoom(image, 0.5) for image in images])

# Resize
x_train = resize_images(train_images)
x_test = resize_images(test_images)

# Then reshape
x_train = x_train.reshape(60000, 14*14)
x_test = x_test.reshape(10000, 14*14)
x_train = train_images.astype('float32')
x_test = test_images.astype('float32')

# normalize to range [0, 1]
x_train /= 255
x_test /= 255

ValueError: cannot reshape array of size 0 into shape (60000,196)

In [10]:
import tensorflow_model_optimization as tfmot

# Apply quantization to the layers
quantize_model = tfmot.quantization.keras.quantize_model

q_aware_model = quantize_model(model_tf)

# 'quantize_model' requires a recompile
q_aware_model.compile(optimizer='adam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

q_aware_model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer (QuantizeLa  (None, 28, 28, 1)         3         
 yer)                                                            
                                                                 
 quant_conv2d_4 (QuantizeWr  (None, 24, 24, 6)         171       
 apperV2)                                                        
                                                                 
 quant_average_pooling2d_4   (None, 12, 12, 6)         3         
 (QuantizeWrapperV2)                                             
                                                                 
 quant_conv2d_5 (QuantizeWr  (None, 8, 8, 16)          2451      
 apperV2)                                                        
                                                                 
 quant_average_pooling2d_5   (None, 4, 4, 16)         

## Converting to TFLite Format


In [17]:
import tensorflow as tf

# Create a converter
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)

# Indicate that you want to perform default optimizations,
# which include quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Define a generator function that provides your test data's numpy arrays
def representative_data_gen():
  for i in range(500):
    yield [np.array(train_images[i:i+1], dtype=np.float32)]

# Use the generator function to guide the quantization process
converter.representative_dataset = representative_data_gen

# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

# Set the input and output tensors to int8
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

# Convert the model
tflite_model = converter.convert()

# Save the model to disk
open("../model/q_aware_model.tflite", "wb").write(tflite_model)

INFO:tensorflow:Assets written to: /var/folders/k7/qnxvvp9d0gv2p52w6qt29gsr0000gn/T/tmpss__xolb/assets


INFO:tensorflow:Assets written to: /var/folders/k7/qnxvvp9d0gv2p52w6qt29gsr0000gn/T/tmpss__xolb/assets
2024-01-25 11:46:10.024932: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-01-25 11:46:10.024944: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-01-25 11:46:10.025062: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/k7/qnxvvp9d0gv2p52w6qt29gsr0000gn/T/tmpss__xolb
2024-01-25 11:46:10.027060: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-01-25 11:46:10.027066: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/k7/qnxvvp9d0gv2p52w6qt29gsr0000gn/T/tmpss__xolb
2024-01-25 11:46:10.033016: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-01-25 11:46:10.081220: I tensorflow/cc/saved_model/loader.cc:217] Running initialization

51600

### Testing the Quantized Model

In [18]:
# Load the TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="../model/q_aware_model.tflite")
interpreter.allocate_tensors()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [19]:
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
