This is an implementation of depolying CNN gensture reconition model to edge device (e.g. Sony Spresense)

Step Overview:
1. Conver Pytorch Model to Onnx Model
2. Conver Onnx Model to Keras Model
3. Conver Keras Model to quantization aware model
4. Retraining quantization aware model
5. Convert it to Tensorflow Lite Model
6. Using Edge Impuls to generate code for Sony Spresense

# Conver Pytorch Model to Onnx Model

In [2]:
import torch
import torchvision
from models.mobilenetv1 import MobilenetV1
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
dummy_input = torch.randn(1, 1, 8, 24, device=device)
model = MobilenetV1(ch_in=1, n_classes=8).to(device)
model.load_state_dict(torch.load(
    "pretrain_model/MobilenetV1_Param@29.29 k _MAC@233.1 KMac_Acc@95.346.pt"))
model.eval()

# print(model)
torch.onnx.export(model, dummy_input,
                  "pretrain_model/onnx_model/MobilenetV1.onnx", verbose=True)


Exported graph: graph(%input.1 : Float(1, 1, 8, 24, strides=[192, 192, 24, 1], requires_grad=0, device=cpu),
      %fc.weight : Float(8, 128, strides=[128, 1], requires_grad=1, device=cpu),
      %fc.bias : Float(8, strides=[1], requires_grad=1, device=cpu),
      %onnx::Conv_125 : Float(32, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_126 : Float(32, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_128 : Float(32, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_129 : Float(32, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_131 : Float(32, 32, 1, 1, strides=[32, 1, 1, 1], requires_grad=0, device=cpu),
      %onnx::Conv_132 : Float(32, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_134 : Float(32, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cpu),
      %onnx::Conv_135 : Float(32, strides=[1], requires_grad=0, device=cpu),
      %onnx::Conv_137 : Float(64, 32, 1, 1, strides

# Conver Onnx Model to Keras Model

In [None]:
# Install library
%cd onnx2keras
!pip install -e .
%cd ..

In [3]:
import tensorflow_model_optimization as tfmot
import tensorflow as tf
import onnx

onnx_model = onnx.load("pretrain_model/onnx_model/MobilenetV1.onnx")
from onnx2keras import onnx_to_keras
model = onnx_to_keras(onnx_model, ['input.1'],name_policy='renumerate',verbose=False,change_ordering=True)
model.summary()

2023-06-13 11:21:53.044613: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-13 11:21:53.243317: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/add

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input.1 (InputLayer)        [(None, 8, 24, 1)]        0         
                                                                 
 LAYER_0_pad (ZeroPadding2D)  (None, 10, 26, 1)        0         
                                                                 
 LAYER_0 (Conv2D)            (None, 4, 12, 32)         320       
                                                                 
 LAYER_1 (Activation)        (None, 4, 12, 32)         0         
                                                                 
 LAYER_2_pad (ZeroPadding2D)  (None, 6, 14, 32)        0         
                                                                 
 LAYER_2 (DepthwiseConv2D)   (None, 4, 12, 32)         320       
                                                                 
 LAYER_3 (Activation)        (None, 4, 12, 32)         0     

## Quantization aware training

In [4]:
quantize_model = tfmot.quantization.keras.quantize_model
q_aware_model = quantize_model(model)
q_aware_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

from utils.ICE_lab_data_preprocessing import ICE_lab_data_preprocessing as utils

data,label,num_classes = utils().extra_data("data/Training_Trimmed")
from sklearn.model_selection import train_test_split

training_data, testing_data, training_label, testing_label = train_test_split(data, label, test_size=0.33, random_state=42)
train_data = tf.data.Dataset.from_tensor_slices((training_data, training_label))
test_data = tf.data.Dataset.from_tensor_slices((testing_data, testing_label))

training_data = training_data.reshape(-1,8,24,1)
testing_data = testing_data.reshape(-1,8,24,1)
training_data = utils().NormalizeData(training_data)
testing_data = utils().NormalizeData(testing_data)
q_aware_model.fit(training_data,training_label,
                  batch_size=1000, epochs=2)
_, q_aware_model_accuracy = q_aware_model.evaluate(
   testing_data, testing_label, batch_size=1000,verbose=True)
print('Quant test accuracy:', q_aware_model_accuracy)




Processing Files: 100%|██████████| 5/5 [00:00<00:00,  8.06it/s]
Processing Files: 100%|██████████| 5/5 [00:00<00:00,  6.87it/s]
Processing Files: 100%|██████████| 5/5 [00:00<00:00,  6.24it/s]
Processing Files: 100%|██████████| 5/5 [00:00<00:00,  5.78it/s]
Processing Files: 100%|██████████| 5/5 [00:00<00:00,  5.10it/s]
Processing Files: 100%|██████████| 5/5 [00:01<00:00,  4.84it/s]
Processing Files: 100%|██████████| 5/5 [00:01<00:00,  4.33it/s]
Processing Files: 100%|██████████| 5/5 [00:01<00:00,  3.53it/s]
Processing Files: 100%|██████████| 8/8 [00:07<00:00,  1.05it/s]


Epoch 1/2
Epoch 2/2
Quant test accuracy: 0.970863401889801


In [6]:
q_aware_model.save("pretrain_model/q_ware_model")

2023-06-13 11:23:43.168834: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,128,1,1]
	 [[{{node inputs}}]]
2023-06-13 11:23:43.171245: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,128,1,1]
	 [[{{node inputs}}]]
2023-06-13 11:23:44.109857: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,128,1,1]
	 [[{{node inputs}}]]
2023

INFO:tensorflow:Assets written to: pretrain_model/q_ware_model/assets


INFO:tensorflow:Assets written to: pretrain_model/q_ware_model/assets


# Convert it to Tensorflow Lite Model

In [10]:
import numpy as np
import os
def representative_dataset():
    data = np.load("representive_data.npy")
    for i in range(1):
        temp_data = data[i]
        temp_data = temp_data.reshape(1,8,24,1)
        yield [temp_data.astype(np.float32)]

import tensorflow as tf

converter = tf.lite.TFLiteConverter.from_saved_model("pretrain_model/q_ware_model")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.float32  # or tf.uint8
converter.inference_output_type = tf.float32

tflite_model = converter.convert()

# Save the model
with open("pretrain_model/tf_lite_model/mobilenetv1.tflite", 'wb') as f:
    f.write(tflite_model)

2023-06-13 11:25:01.663078: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2023-06-13 11:25:01.663097: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-06-13 11:25:01.663224: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: pretrain_model/q_ware_model
2023-06-13 11:25:01.667615: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }
2023-06-13 11:25:01.667628: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: pretrain_model/q_ware_model
2023-06-13 11:25:01.681657: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-06-13 11:25:01.793021: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: pretrain_model/q_ware_model
2023-06-13 11:25:01.826286: I tensorflow/cc/saved_model/loader.cc:314] SavedModel load for tags { serve }; 

# Simulate Model Accuracy

In [11]:
import numpy as np
import tensorflow as tf
import os

# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path="pretrain_model/tf_lite_model/mobilenetv1.tflite")
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the model on random input data
input_shape = input_details[0]['shape']
# input_data = np.array(np.random.random_sample(input_shape), dtype=np.int8)
for j in range(2,9):
    ori_input_data = np.load(f"representive_data{j}.npy")
    ori_input_data = ori_input_data.astype(np.float32)
    # ori_input_data = ori_input_data.reshape(-1,8,24,1)
    correct = 0
    print("Total Sample Size:",ori_input_data.shape[0])
    for i in range(ori_input_data.shape[0]):
        input_data = np.expand_dims(ori_input_data[i], 0)
        input_data = input_data.reshape(-1,8,24,1)
        interpreter.set_tensor(input_details[0]['index'], input_data)

        interpreter.invoke()

        # get_tensor() returns a copy of the tensor data
        # use tensor() in order to get a pointer to the tensor
        output_data = interpreter.get_tensor(output_details[0]['index'])
        if np.argmax(output_data) == j-1:
            correct += 1
    print("Prediction Correct Size:",correct) #Total:30720
    print("Accuracy",round(correct/int(ori_input_data.shape[0]),2))

Total Sample Size: 30720
Prediction Correct Size: 28471
Accuracy 0.93
Total Sample Size: 30720
Prediction Correct Size: 29850
Accuracy 0.97
Total Sample Size: 30720
Prediction Correct Size: 27100
Accuracy 0.88
Total Sample Size: 30720
Prediction Correct Size: 29913
Accuracy 0.97
Total Sample Size: 30720
Prediction Correct Size: 34
Accuracy 0.0
Total Sample Size: 30720
Prediction Correct Size: 26710
Accuracy 0.87
Total Sample Size: 30880
Prediction Correct Size: 8203
Accuracy 0.27
