In [3]:
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import onnx
import onnxsim
import cv2
import re
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

IMG_WIDTH, IMG_HEIGHT = 45, 45

In [4]:
class GesturePredictor(nn.Module):
    def __init__(self):
        super(GesturePredictor, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=0)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=0)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=0)
        self.fc1 = nn.Linear(576, 128)  # Adjusted output size after convolutions
        self.dropout = nn.Dropout(0.20)
        self.fc2 = nn.Linear(128, 8)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [6]:
model = GesturePredictor()
model.load_state_dict(torch.load('handrecognition_model3.pth', weights_only=True, map_location=torch.device('cpu')))

<All keys matched successfully>

In [7]:
from torchsummary import summary
summary(model, input_size=(1, 45, 45))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 43, 43]             640
         MaxPool2d-2           [-1, 64, 21, 21]               0
            Conv2d-3           [-1, 64, 19, 19]          36,928
         MaxPool2d-4             [-1, 64, 9, 9]               0
            Conv2d-5             [-1, 64, 7, 7]          36,928
         MaxPool2d-6             [-1, 64, 3, 3]               0
            Linear-7                  [-1, 128]          73,856
           Dropout-8                  [-1, 128]               0
            Linear-9                    [-1, 8]           1,032
Total params: 149,384
Trainable params: 149,384
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 1.36
Params size (MB): 0.57
Estimated Total Size (MB): 1.94
-------------------------------------------

In [None]:
# 导出onnx模型
# esp-dl当前只支持 batch size=1
dummy_input = torch.randn([1, 1, 45, 45], dtype=torch.float32)
torch.onnx.export(
    model,
    dummy_input,
    "handrecognition_model3.onnx",
    opset_version=13,
    input_names=["input"],
    output_names=["output"],
)
onnx_model = onnx.load_model("handrecognition_model3.onnx")
onnx.checker.check_model(onnx_model)
onnx_model, check = onnxsim.simplify(onnx_model)
assert check, "Simplified ONNX model could not be validated"
onnx.save(onnx_model, "handrecognition_model3.onnx")

In [11]:
def generate_data():
    path = "D:\\Onedrive\hdd portable\\vaulthor\VaulThor\\2 Projects\\thesis\\esp_dl_1\\esp-dl\\examples\\tutorial\\how_to_quantize_model\\quantize_my_model"
    data = []
    labels = []

    # Load and preprocess images
    base_path = os.path.join(path, 'my_dataset_blackwhite')
    for folder in sorted(os.listdir(base_path)):
        print(folder)
        folder_path = os.path.join(base_path, folder)
        if os.path.isdir(folder_path):
            for img_name in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_name)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Read in grayscale
                if img is not None:
                    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                    data.append(img)
                    match = re.match(r'(\d{2})_', folder)
                    label = int(match.group(1))
                    labels.append(label - 1)

    # Convert to numpy arrays
    X = np.array(data, dtype=np.float32) # Normalize pixel values
    Y = np.array(labels, dtype=np.int64)

    # Reshape X to fit model input (Add channel dimension)
    X = X.reshape(-1, 1, IMG_WIDTH, IMG_HEIGHT)
    return X, Y

In [12]:
X, Y = generate_data()
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

01_fist
02_up
03_down
04_left
05_right
06_forward
07_backward
08_nothing


In [3]:
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION="python"
from ppq.api import espdl_quantize_onnx

TypeError: Couldn't build proto file into descriptor pool: duplicate file name caffe.proto

In [None]:
ONNX_MODEL_PATH = "handrecognition_model3.onnx"
ESPDL_MODEL_PATH = "handrecognition_model3.espdl"
INPUT_SHAPE = [1, 1, 45, 45]  # 1 个输入特征
TARGET = "esp32s3"  # 目标量化精度
NUM_OF_BITS = 8  # 量化位数
DEVICE = "cpu"  # 'cuda' or 'cpu', if you use cuda, please make sure that cuda is available

x = torch.tensor(X_val)
y = torch.tensor(Y_val)

# dataloader shuffle必须设置为False。
# 因为计算量化误差的时候会多次遍历数据集，如果shuffle是True的话，会得到错误的量化误差。
dataset = TensorDataset(x, y)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

quant_ppq_graph = espdl_quantize_onnx(
    onnx_import_file=ONNX_MODEL_PATH,
    espdl_export_file=ESPDL_MODEL_PATH,
    calib_dataloader=dataloader,
    calib_steps=128,  # 校准的步数
    input_shape=INPUT_SHAPE,  # 输入形状，批次为 1
    inputs=None,
    target=TARGET,  # 目标量化类型
    num_of_bits=NUM_OF_BITS,  # 量化位数
    collate_fn=collate_fn,
    dispatching_override=None,
    device=DEVICE,
    error_report=True,
    skip_export=False,
    export_test_values=True,
    verbose=1,  # 输出详细日志信息
)