## Preparing the Model for Deployment
- Loading the model
- Tensorflow runtime for model evaluation
- Model pruning
- Model quantization
- Model compression
- Model conversion to tflite
- Model conversion to binary format .bin

In [31]:
import numpy as np
import tensorflow as tf
from keras.models import load_model
from keras.preprocessing import image
import tempfile
import keras
from tensorflow.keras.models import Model
import keras.backend as K

In [47]:
# Loading the keras model
model = load_model('model.h5')

#### Model Evaluation

In [33]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()


# Print the signatures from the converted model
interpreter = tf.lite.Interpreter(model_content=tflite_model)

signatures = interpreter.get_signature_list()
print(signatures)



INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmp5vm6ipzc\assets


INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmp5vm6ipzc\assets


{'serving_default': {'inputs': ['input_2'], 'outputs': ['dense_1']}}


### Model Optimization
#### Dynamic range quantization
Dynamic range quantization is a recommended starting point because it provides reduced memory usage and faster computation without you having to provide a representative dataset for calibration.<br> 
This type of quantization, statically quantizes only the weights from floating point to integer at conversion time, which provides 8-bits of precision:<br>

##### Model size: 3.2MB 87.2% smaller<br>

In [48]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# quantized_and_pruned_tflite_model = converter.convert()
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.target_spec.supported_types = [tf.float16]
quantized_and_pruned_tflite_model = converter.convert()

with open('quantized_and_pruned_tflite2.tflite', 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)



INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmp63bmpa50\assets


INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmp63bmpa50\assets


#### Full integer quantization
You can get further latency improvements, reductions in peak memory usage, and compatibility with integer only hardware devices or accelerators by making sure all model math is integer quantized.

For full integer quantization, you need to calibrate or estimate the range, i.e, (min, max) of all floating-point tensors in the model.<br> Unlike constant tensors such as weights and biases, variable tensors such as model input, activations (outputs of intermediate layers) and model output cannot be calibrated unless we run a few inference cycles.<br> As a result, the converter requires a representative dataset to calibrate them. This dataset can be a small subset (around ~100-500 samples) of the training or validation data

In [35]:
import os
from sklearn.model_selection import train_test_split
import pandas as pd
#set directories.
base_dir = os.path.join(os.getcwd(), 'dsa2022-Arusha/camera-trap/porini-machine-learning/data/porinicroppedimages/dataset/')
# base_dir = './data/porinicroppedimages/dataset/'


# Directory with our training images
train_dir = os.path.join(base_dir, 'train/')

print(train_dir)

#Directory with test images.
test_dir = os.path.join(base_dir, 'test/')
print(test_dir)

df_train = pd.read_csv(base_dir + 'train.csv')
print(df_train.shape)

df_test = pd.read_csv(base_dir + 'test.csv')

seed = 2022
train, val = train_test_split(df_train, test_size = 0.113, random_state = seed)

size = (128,128)
batch_size = 32

train_size = train.shape[0]
val_size = val.shape[0]

#to train on whole data per iteration
train_steps_per_epoch = int(train_size/batch_size) #int here used to round off
val_steps_per_epoch = int(val_size/batch_size) #int here used to round off

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(   
                # rotation_range = 30,
                # width_shift_range = 0.2,#
                # height_shift_range = 0.2,#
                # brightness_range = [0.5,1.5],#
                # horizontal_flip = True,
                # fill_mode = 'nearest'
)


val_datagen = tf.keras.preprocessing.image.ImageDataGenerator()


train_generator = train_datagen.flow_from_dataframe(
                    train,
                    directory = train_dir,
                    x_col = "filename",
                    y_col = "Species",
                    target_size = size,
                    class_mode = "categorical",
                    batch_size = batch_size,
                    shuffle = True,
                    seed = seed,
                    interpolation = "nearest",
                    #validate_filenames=False
)

val_generator = val_datagen.flow_from_dataframe(
                    val,
                    directory = train_dir, #valid is from train
                    x_col = "filename",
                    y_col = "Species",
                    target_size = size,
                    class_mode = "categorical",
                    batch_size = batch_size,
                    shuffle = True,
                    seed = seed,
                    interpolation = "nearest"
)

c:\Users\Austin\Desktop\Agent\dsail-tech4wildlife\dsa2022-Arusha/camera-trap/porini-machine-learning/data/porinicroppedimages/dataset/train/
c:\Users\Austin\Desktop\Agent\dsail-tech4wildlife\dsa2022-Arusha/camera-trap/porini-machine-learning/data/porinicroppedimages/dataset/test/
(2086, 9)
Found 1850 validated image filenames belonging to 6 classes.
Found 236 validated image filenames belonging to 6 classes.


In [36]:
image_tensors = []

for _ in range(len(val_generator)):
    batch_images = next(val_generator)
    image_tensors.extend(batch_images)

In [37]:
# using pytorch to get image 
next(val_generator)[0][0].shape

(128, 128, 3)

In [39]:
def representative_dataset():
    for data in tf.data.Dataset.from_tensor_slices(val_generator).batch(1).take(100):
        yield tf.cast(data, tf.float32)


In [45]:
import tensorflow as tf
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# converter.target_spec.supported_types = [tf.float16]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8
tflite_quant_model = converter.convert()



INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpn1fzmc3z\assets


INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpn1fzmc3z\assets


In [44]:
dataset = tf.keras.utils.image_dataset_from_directory(base_dir, batch_size=1, image_size=(128, 128))

def representative_dataset():
    for images, _ in dataset:
        yield [images]



Found 2322 files belonging to 2 classes.


In [46]:
with open('tflite_quant_model2.tflite', 'wb') as f:
  f.write(tflite_quant_model)


#### Creating a representative dataset

In [73]:
import numpy as np
import tensorflow as tf

# Load the dataset
dataset = tf.keras.utils.image_dataset_from_directory(
    base_dir,
    batch_size=1,
    image_size=(128, 128)
)

# Create an empty list to store the representative features
representative_features = []

# Iterate over the dataset and extract the images
for images, _ in dataset:
    # Append the images to the list
    representative_features.append(images.numpy())

# Convert the list to a NumPy array
representative_features = np.concatenate(representative_features, axis=0)

# Save the representative features as an .npy file
np.save('representative_features.npy', representative_features)

print("Representative features saved as representative_features.npy")


Found 2322 files belonging to 2 classes.
Representative features saved as representative_features.npy


In [25]:
dataset = tf.keras.utils.image_dataset_from_directory(base_dir, batch_size=1, image_size=(128, 128))

for images, labels in dataset:
    # Accessing the shape of the batched images
    batch_shape = images.shape
    print("Batch shape:", batch_shape)
    break  # Only printing the shape of the first batch


Found 2322 files belonging to 2 classes.
Batch shape: (1, 128, 128, 3)


In [74]:
representative_features

array([[[[128.70117  , 108.70117  , 109.70117  ],
         [125.29883  , 105.29883  , 106.29883  ],
         [124.00781  , 104.00781  , 105.00781  ],
         ...,
         [180.5039   , 154.5039   , 155.5039   ],
         [182.90234  , 156.90234  , 157.90234  ],
         [182.09961  , 158.09961  , 158.09961  ]],

        [[128.09961  , 108.09961  , 109.09961  ],
         [128.90234  , 108.90234  , 109.90234  ],
         [128.99414  , 108.99414  , 109.99414  ],
         ...,
         [176.50195  , 150.50195  , 151.50195  ],
         [178.90234  , 152.90234  , 153.90234  ],
         [178.90039  , 154.90039  , 154.90039  ]],

        [[128.19922  , 108.19922  , 109.19922  ],
         [130.       , 110.       , 111.       ],
         [130.4961   , 110.49609  , 111.49609  ],
         ...,
         [176.99805  , 150.99805  , 151.99805  ],
         [176.59766  , 150.59766  , 151.59766  ],
         [174.90039  , 150.90039  , 150.90039  ]],

        ...,

        [[109.00195  ,  89.00195  ,  7

In [13]:
def representative_dataset():
      for data in dataset:
            yield {
            "image": data.image,
            "bias": data.bias,
            }

In [55]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.utils import get_file
import tempfile
import zipfile

# Load the original Keras model
original_model = load_model('model.h5')

# Define a smaller model architecture
small_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=(128, 128, 3)),  # Specify the input shape of your model
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=shape)
    # Convolutional layers
    # layers.Conv2D(32, (3, 3), activation='relu'),
    # layers.MaxPooling2D((2, 2)),
    # layers.Conv2D(64, (3, 3), activation='relu'),
    # layers.MaxPooling2D((2, 2)),
    
    # Flatten layer
    layers.Flatten(),
    
    # Dense layers
    layers.Dense(64, activation='relu'),
    layers.Dense(6, activation='softmax')  # Assuming 10 classes for classification
    
    # Add more layers as needed
])

# Transfer the weights from the original model to the smaller model
for layer_target, layer_source in zip(small_model.layers, original_model.layers):
    layer_target.set_weights(layer_source.get_weights())

# Quantize the weights and activations
converter = tf.lite.TFLiteConverter.from_keras_model(small_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.supported_types = [tf.float16]
tflite_quant_model = converter.convert()

# Save the quantized model to a file
tflite_model_path = 'quantized_model.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_quant_model)

# Compress the model file
compressed_model_path = 'compressed_model.zip'
with zipfile.ZipFile(compressed_model_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(tflite_model_path, arcname='model.tflite')

# Check the compressed model size
compressed_model_size = os.path.getsize(compressed_model_path)
print(f"Compressed model size: {compressed_model_size / (1024 * 1024):.2f} MB")


ValueError: You called `set_weights(weights)` on layer "flatten_3" with a weight list of length 2, but the layer was expecting 0 weights. Provided weights: [array([[[[ 0.00130486, -0.03660612,  0.10142724, ...

In [60]:
import tensorflow as tf
from tensorflow.keras import models, layers
import zipfile
import os

# Define the input shape of the image
input_shape = (128, 128, 3)

# Define the bigger model
bigger_model = models.Sequential()
bigger_model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
bigger_model.add(layers.MaxPooling2D((2, 2)))
bigger_model.add(layers.Conv2D(64, (3, 3), activation='relu'))
bigger_model.add(layers.MaxPooling2D((2, 2)))
bigger_model.add(layers.Conv2D(64, (3, 3), activation='relu'))
bigger_model.add(layers.Flatten())
bigger_model.add(layers.Dense(64, activation='relu'))
bigger_model.add(layers.Dense(6, activation='softmax'))

# Load the weights into the bigger model
bigger_model.load_weights('model.h5')

# Define a smaller model architecture
smaller_model = models.Sequential()
smaller_model.add(layers.InputLayer(input_shape=input_shape))
smaller_model.add(layers.Conv2D(16, (3, 3), activation='relu'))
smaller_model.add(layers.MaxPooling2D((2, 2)))
smaller_model.add(layers.Conv2D(16, (3, 3), activation='relu'))
smaller_model.add(layers.MaxPooling2D((2, 2)))
smaller_model.add(layers.Flatten())
smaller_model.add(layers.Dense(64, activation='relu'))
smaller_model.add(layers.Dense(6, activation='softmax'))

# Transfer the weights from the bigger model to the smaller model
for layer_target, layer_source in zip(smaller_model.layers[:-1], bigger_model.layers[:-1]):
    if isinstance(layer_target, tf.keras.layers.Conv2D):
        # Adjust the number of filters in Conv2D layers
        layer_target.set_weights([layer_source.get_weights()[0][:, :, :16, :], layer_source.get_weights()[1]])
    else:
        layer_target.set_weights(layer_source.get_weights())

# Quantize the weights and activations
converter = tf.lite.TFLiteConverter.from_keras_model(smaller_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.supported_types = [tf.float16]
tflite_quant_model = converter.convert()

# Save the quantized model to a file
tflite_model_path = 'quantized_model.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_quant_model)

# Compress the model file
compressed_model_path = 'compressed_model.zip'
with zipfile.ZipFile(compressed_model_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(tflite_model_path, arcname='model.tflite')

# Check the compressed model size
compressed_model_size = os.path.getsize(compressed_model_path)
print(f"Compressed model size: {compressed_model_size / (1024 * 1024):.2f} MB")


ValueError: Layer conv2d_29 weight shape (3, 3, 3, 16) is not compatible with provided weight shape (3, 3, 3, 32).

In [62]:
import tf2onnx

In [67]:
# Convert the Keras model to ONNX
onnx_model, _ = tf2onnx.convert.from_keras(model)

# Save the ONNX model to a file
onnx_model_path = 'converted_model.onnx'
tf2onnx.save.save_model(onnx_model, onnx_model_path)

print("Keras model converted to ONNX successfully.")


AttributeError: module 'tf2onnx' has no attribute 'save'

In [71]:
import onnx
from onnxruntime.quantization import quantize_dynamic

# Load the ONNX model
# onnx_model = onnx.load('path_to_onnx_model.onnx')

# Quantize the model
# quantized_model = quantize_dynamic(onnx_model)

# Save the quantized model
# quantized_model_path = 'quantized_model.onnx'
onnx.save_model(onnx_model, onnx_model_path)

print("ONNX model quantized and saved successfully.")


ONNX model quantized and saved successfully.
