In [16]:
# Imports
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing.image import load_img, img_to_array

print("Libraries imported.")

# Load pretrained VGG16 model
model = VGG16(weights='imagenet')
print("Loaded VGG16 model.")

# Save weights to npy file (simulate your assignment step)
weights = {}
for layer in model.layers:
    if layer.get_weights():
        weights[layer.name] = layer.get_weights()
np.save('vgg16_weights.npy', weights)
print("Weights saved to vgg16_weights.npy")

# Reload weights into new model instance
loaded_weights = np.load('vgg16_weights.npy', allow_pickle=True).item()
model_reloaded = VGG16(weights=None)
for layer in model_reloaded.layers:
    if layer.name in loaded_weights:
        layer.set_weights(loaded_weights[layer.name])
print("Model reloaded from saved weights.")

# Convert to FP16 quantized TFLite model
converter_fp16 = tf.lite.TFLiteConverter.from_keras_model(model_reloaded)
converter_fp16.optimizations = [tf.lite.Optimize.DEFAULT]
converter_fp16.target_spec.supported_types = [tf.float16]
tflite_model_fp16 = converter_fp16.convert()
with open('vgg16_fp16.tflite', 'wb') as f:
    f.write(tflite_model_fp16)
print("FP16 quantized TFLite model saved as vgg16_fp16.tflite")

# Representative dataset generator (Improved for better INT8 accuracy)
def representative_data_gen():
    # Simulate real images using random noise here for example
    # Replace this with loading real images for best accuracy, e.g. from a dataset folder
    for _ in range(100):
        img = np.random.rand(1, 224, 224, 3).astype(np.float32)
        yield [img]

# Convert to INT8 quantized TFLite model
converter_int8 = tf.lite.TFLiteConverter.from_keras_model(model_reloaded)
converter_int8.optimizations = [tf.lite.Optimize.DEFAULT]
converter_int8.representative_dataset = representative_data_gen
converter_int8.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter_int8.inference_input_type = tf.uint8
converter_int8.inference_output_type = tf.uint8
tflite_model_int8 = converter_int8.convert()
with open('vgg16_int8.tflite', 'wb') as f:
    f.write(tflite_model_int8)
print("INT8 quantized TFLite model saved as vgg16_int8.tflite")

#  Download and preprocess a sample image for testing inference
img_path = tf.keras.utils.get_file('cat.jpg',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg')
img = load_img(img_path, target_size=(224, 224))
input_data = preprocess_input(np.expand_dims(img_to_array(img), axis=0))

# Original model prediction
pred = model.predict(input_data)
print("Original model prediction:", decode_predictions(pred, top=3)[0])

# FP16 quantized model inference
interpreter_fp16 = tf.lite.Interpreter(model_path="vgg16_fp16.tflite")
interpreter_fp16.allocate_tensors()
input_details_fp16 = interpreter_fp16.get_input_details()
output_details_fp16 = interpreter_fp16.get_output_details()
interpreter_fp16.set_tensor(input_details_fp16[0]['index'], input_data.astype(np.float32))
interpreter_fp16.invoke()
output_fp16 = interpreter_fp16.get_tensor(output_details_fp16[0]['index'])
print("FP16 quantized model prediction:", decode_predictions(output_fp16, top=3)[0])

# INT8 quantized model inference
interpreter_int8 = tf.lite.Interpreter(model_path="vgg16_int8.tflite")
interpreter_int8.allocate_tensors()
input_details_int8 = interpreter_int8.get_input_details()
output_details_int8 = interpreter_int8.get_output_details()
scale, zero_point = input_details_int8[0]['quantization']
int8_input = (input_data / scale + zero_point).astype(np.uint8)
interpreter_int8.set_tensor(input_details_int8[0]['index'], int8_input)
interpreter_int8.invoke()
int8_output = interpreter_int8.get_tensor(output_details_int8[0]['index'])
out_scale, out_zero = output_details_int8[0]['quantization']
float_output = (int8_output.astype(np.float32) - out_zero) * out_scale
print("INT8 quantized model prediction:", decode_predictions(float_output, top=3)[0])


Libraries imported.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step
Loaded VGG16 model.
Weights saved to vgg16_weights.npy
Model reloaded from saved weights.
Saved artifact at '/tmp/tmphc_rq0qb'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_138')
Output Type:
  TensorSpec(shape=(None, 1000), dtype=tf.float32, name=None)
Captures:
  139000012905296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012909520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012910480: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012908560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012909712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012910096: TensorSpe

W0000 00:00:1748292191.604473      35 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1748292191.604515      35 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.


FP16 quantized TFLite model saved as vgg16_fp16.tflite
Saved artifact at '/tmp/tmpa9tmxh60'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_138')
Output Type:
  TensorSpec(shape=(None, 1000), dtype=tf.float32, name=None)
Captures:
  139000012905296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012909520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012910480: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012908560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012909712: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012910096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012908752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012902608: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012907024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139000012905680: Tenso

W0000 00:00:1748292237.131589      35 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1748292237.131626      35 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: UINT8


INT8 quantized TFLite model saved as vgg16_int8.tflite
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 594ms/step
Original model prediction: [('n02127052', 'lynx', 0.5816908), ('n02114855', 'coyote', 0.36767617), ('n02114367', 'timber_wolf', 0.009691837)]
FP16 quantized model prediction: [('n02127052', 'lynx', 0.5817597), ('n02114855', 'coyote', 0.3675262), ('n02114367', 'timber_wolf', 0.009713426)]
INT8 quantized model prediction: [('n15075141', 'toilet_tissue', 0.03125), ('n03788365', 'mosquito_net', 0.03125), ('n04209239', 'shower_curtain', 0.03125)]
