In [1]:
import os
from datetime import datetime as dt
from glob import glob

# import calibrator
import cv2
import numpy as np
import tensorrt as trt
import torch
import torch.nn.functional as F
from cuda import cudart
from torch.autograd import Variable

# 1. 数据处理

下载 MNIST 图片

In [11]:
! bash ../data/download_mnist.sh

--2023-05-27 01:48:37--  http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Resolving yann.lecun.com (yann.lecun.com)... 172.67.171.76, 104.21.29.36, 2606:4700:3036::ac43:ab4c, ...
Connecting to yann.lecun.com (yann.lecun.com)|172.67.171.76|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9912422 (9.5M) [application/x-gzip]
Saving to: ‘./MNIST/train-images-idx3-ubyte.gz’


2023-05-27 01:48:42 (2.34 MB/s) - ‘./MNIST/train-images-idx3-ubyte.gz’ saved [9912422/9912422]

--2023-05-27 01:48:42--  http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Resolving yann.lecun.com (yann.lecun.com)... 104.21.29.36, 172.67.171.76, 2606:4700:3034::6815:1d24, ...
Connecting to yann.lecun.com (yann.lecun.com)|104.21.29.36|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28881 (28K) [application/x-gzip]
Saving to: ‘./MNIST/train-labels-idx1-ubyte.gz’


2023-05-27 01:48:43 (1.40 MB/s) - ‘./MNIST/train-labels-idx1-ubyte.gz’ saved [28881/28881]

导入 LoadMnistData ，提取已下载好的 .gz 文件转换成图片

In [13]:
import LoadMnistData

nTrain = 6000 # <= 60000
nTest = 1000 # <= 10000

mnist = LoadMnistData.MnistData("../data/MNIST/", isOneHot=False)
mnist.saveImage(nTrain, "../data/MNIST/train/", True)  # 60000 images in total
mnist.saveImage(nTest, "../data/MNIST/test/", False)  # 10000 images in total

Extracting ../data/MNIST/train-images-idx3-ubyte.gz
Extracting ../data/MNIST/train-labels-idx1-ubyte.gz
Extracting ../data/MNIST/t10k-images-idx3-ubyte.gz
Extracting ../data/MNIST/t10k-labels-idx1-ubyte.gz


# 2. 创建并训练模型

In [4]:
# 相关参数
BATCH_SIZE = 128
DATA_PATH = "../data/MNIST/"

注意，这里的模型输出，y 为输出节点的值，z 为对应预测的值 (0,1,...9)

In [5]:
# 构造模型
class Net(torch.nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, (5, 5), padding=(2, 2), bias=True)
        self.conv2 = torch.nn.Conv2d(32, 64, (5, 5), padding=(2, 2), bias=True)
        self.fc1 = torch.nn.Linear(64 * 7 * 7, 1024, bias=True)
        self.fc2 = torch.nn.Linear(1024, 10, bias=True)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = x.reshape(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        y = self.fc2(x)
        z = F.softmax(y, dim=1)
        z = torch.argmax(z, dim=1)
        return y, z

In [6]:
# 构建训练集
class MyData(torch.utils.data.Dataset):

    def __init__(self, datapath, isTrain=True):
        trainFileList = sorted(glob(datapath + "train/*.jpg"))
        testFileList = sorted(glob(datapath + "test/*.jpg"))
        if isTrain:
            self.data = trainFileList
        else:
            self.data = testFileList
            
        self.nHeight = 28
        self.nWidth = 28

    def __getitem__(self, index):
        imageName = self.data[index]
        data = cv2.imread(imageName, cv2.IMREAD_GRAYSCALE)
        label = np.zeros(10, dtype=np.float32)
        index = int(imageName[-7])
        label[index] = 1
        return torch.from_numpy(data.reshape(1, self.nHeight, self.nWidth).astype(np.float32)), torch.from_numpy(label)

    def __len__(self):
        return len(self.data)

In [7]:
model = Net().cuda()
ceLoss = torch.nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001)

trainDataset = MyData(datapath = DATA_PATH, isTrain = True)
testDataset = MyData(datapath = DATA_PATH, isTrain = False)
trainLoader = torch.utils.data.DataLoader(dataset=trainDataset, batch_size=BATCH_SIZE, shuffle=True)
testLoader = torch.utils.data.DataLoader(dataset=testDataset, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
def train(model, trainLoader, testLoader, opt, ceLoss, epoch):
    for epoch in range(epoch):
        for xTrain, yTrain in trainLoader:
            xTrain = Variable(xTrain).cuda()
            yTrain = Variable(yTrain).cuda()
            opt.zero_grad()
            y_, z = model(xTrain)
            loss = ceLoss(y_, yTrain)
            loss.backward()
            opt.step()

        with torch.no_grad():
            acc = 0
            n = 0
            for xTest, yTest in testLoader:
                xTest = Variable(xTest).cuda()
                yTest = Variable(yTest).cuda()
                y_, z = model(xTest)
                acc += torch.sum(z == torch.matmul(yTest, torch.Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).to("cuda:0"))).cpu().numpy()
                n += xTest.shape[0]
            print("%s, epoch %2d, loss = %f, test acc = %f" % (dt.now(), epoch + 1, loss.data, acc / n))

    print("Succeeded building model in pyTorch!")
    print("test acc = %f" % (acc / n))

In [9]:
train(model, trainLoader, testLoader, opt, ceLoss, 25)

2023-05-26 13:09:19.062010, epoch  1, loss = 0.436704, test acc = 0.875000
2023-05-26 13:09:19.417785, epoch  2, loss = 0.158914, test acc = 0.917000
2023-05-26 13:09:19.752828, epoch  3, loss = 0.223163, test acc = 0.927000
2023-05-26 13:09:20.078149, epoch  4, loss = 0.114237, test acc = 0.927000
2023-05-26 13:09:20.411632, epoch  5, loss = 0.040407, test acc = 0.940000
2023-05-26 13:09:20.738097, epoch  6, loss = 0.028230, test acc = 0.950000
2023-05-26 13:09:21.077965, epoch  7, loss = 0.043541, test acc = 0.949000
2023-05-26 13:09:21.414767, epoch  8, loss = 0.039208, test acc = 0.956000
2023-05-26 13:09:21.748811, epoch  9, loss = 0.007606, test acc = 0.948000
2023-05-26 13:09:22.075274, epoch 10, loss = 0.039192, test acc = 0.951000
2023-05-26 13:09:22.406398, epoch 11, loss = 0.015826, test acc = 0.955000
2023-05-26 13:09:22.768756, epoch 12, loss = 0.005088, test acc = 0.953000
2023-05-26 13:09:23.115798, epoch 13, loss = 0.047944, test acc = 0.946000
2023-05-26 13:09:23.46111

# 3. 模型导出 ONNX


`model`：要导出的 PyTorch 模型。

`torch.randn(1, 1, nHeight, nWidth, device="cuda")`：模型的输入示例。这里创建了一个随机的 1 x 1 x 28 x 28 张量作为输入，并将其放在 GPU（CUDA 设备）上。

`onnxFile`：要保存 ONNX 文件的路径。

`input_names=["x"]`：指定模型输入的名称，用于在 ONNX 文件中标识输入节点。

`output_names=["y", "z"]`：指定模型输出的名称，用于在 ONNX 文件中标识输出节点。

`do_constant_folding=True`：启用常量折叠优化。这可以简化计算图，提高推理性能。

`verbose=True`：打印详细的模型导出信息。

`keep_initializers_as_inputs=True`：保留模型的初始化参数作为输入。这有助于在后续的优化过程中提高模型的兼容性。

`opset_version=12`：指定 ONNX 的操作集版本。这可以确保导出的 ONNX 模型与其他框架和工具的兼容性。

`dynamic_axes={"x": {0: "nBatchSize"}, "z": {0: "nBatchSize"}}`：指定模型的动态维度。这里设置输入节点 "x" 和输出节点 "z" 的 batch 维度（第 0 维）为动态维度。

In [10]:
nHeight = 28
nWidth = 28
onnxFile = "./model.onnx"
trtFile = "./model.engine"

torch.onnx.export(model, torch.randn(1, 1, nHeight, nWidth, device="cuda"), onnxFile, input_names=["x"], output_names=["y", "z"], do_constant_folding=True, verbose=True, keep_initializers_as_inputs=True, opset_version=12, dynamic_axes={"x": {0: "nBatchSize"}, "z": {0: "nBatchSize"}})
print("Succeeded converting model into ONNX!")

Exported graph: graph(%x : Float(*, 1, 28, 28, strides=[784, 784, 28, 1], requires_grad=0, device=cuda:0),
      %conv1.weight : Float(32, 1, 5, 5, strides=[25, 25, 5, 1], requires_grad=1, device=cuda:0),
      %conv1.bias : Float(32, strides=[1], requires_grad=1, device=cuda:0),
      %conv2.weight : Float(64, 32, 5, 5, strides=[800, 25, 5, 1], requires_grad=1, device=cuda:0),
      %conv2.bias : Float(64, strides=[1], requires_grad=1, device=cuda:0),
      %fc1.weight : Float(1024, 3136, strides=[3136, 1], requires_grad=1, device=cuda:0),
      %fc1.bias : Float(1024, strides=[1], requires_grad=1, device=cuda:0),
      %fc2.weight : Float(10, 1024, strides=[1024, 1], requires_grad=1, device=cuda:0),
      %fc2.bias : Float(10, strides=[1], requires_grad=1, device=cuda:0)):
  %/conv1/Conv_output_0 : Float(*, 32, 28, 28, strides=[25088, 784, 28, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[5, 5], pads=[2, 2, 2, 2], strides=[1, 1], onnx_name=