In [None]:
!pip install onnx

Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.17.0


In [None]:
!pip install tensorrt==8.6.1

Collecting tensorrt==8.6.1
  Downloading tensorrt-8.6.1.tar.gz (16 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: tensorrt
  Building wheel for tensorrt (setup.py) ... [?25l[?25hdone
  Created wheel for tensorrt: filename=tensorrt-8.6.1-py2.py3-none-any.whl size=16972 sha256=ed2d19f8251c444b1fe305ae2720ab65c4b0078643493cb723df49702878a56e
  Stored in directory: /root/.cache/pip/wheels/6d/29/56/abdffd4c604f255b5254bef3f1c598ab7811ea020540599438
Successfully built tensorrt
Installing collected packages: tensorrt
Successfully installed tensorrt-8.6.1


In [None]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.2.tar.gz (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m58.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.14-py3-none-any.whl.metadata (3.0 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading pytools-2024.1.14-py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m8.1 MB/s[0m eta 

In [None]:
import torch
from torchvision import models, transforms
from PIL import Image
import onnx
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np

# 加载预训练的PyTorch模型
model = models.mobilenet_v2(pretrained=True)
model.eval()

# 定义图像预处理流程
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# 加载并预处理图像
img = Image.open('cat.jpg')
input_tensor = preprocess(img)
input_batch = input_tensor.unsqueeze(0).numpy()

# 导出模型到ONNX格式
torch.onnx.export(model,
                  torch.from_numpy(input_batch),
                  "mobilenetv2.onnx",
                  verbose=True,
                  export_params=True,
                  opset_version=11)

# 创建TensorRT构建器
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)

# 使用新的网络定义方式（适用于TensorRT 9.x）
network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(network_flags)
parser = trt.OnnxParser(network, TRT_LOGGER)

# 解析ONNX模型
with open("mobilenetv2.onnx", "rb") as model_file:
    if not parser.parse(model_file.read()):
        for error in range(parser.num_errors):
            print(parser.get_error(error))
        raise RuntimeError("Failed to parse ONNX file.")

# 配置构建器
config = builder.create_builder_config()
config.max_workspace_size = 1 << 30  # 1GB

# 启用FP16模式（如果支持）
if builder.platform_has_fast_fp16:
    config.set_flag(trt.BuilderFlag.FP16)

# 构建TensorRT引擎
engine = builder.build_engine(network, config)
if engine is None:
    raise RuntimeError("Failed to build the engine.")

# 序列化并保存引擎
with open("mobilenetv2.trt", "wb") as f:
    f.write(engine.serialize())

# 加载TensorRT引擎
runtime = trt.Runtime(TRT_LOGGER)
with open("mobilenetv2.trt", "rb") as f:
    engine = runtime.deserialize_cuda_engine(f.read())

# 创建执行上下文
context = engine.create_execution_context()

# 分配内存
inputs, outputs, bindings, stream = [], [], [], cuda.Stream()
for binding in engine:
    binding_idx = engine.get_binding_index(binding)
    size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
    dtype = trt.nptype(engine.get_binding_dtype(binding))
    host_mem = cuda.pagelocked_empty(size, dtype)
    device_mem = cuda.mem_alloc(host_mem.nbytes)
    bindings.append(int(device_mem))
    if engine.binding_is_input(binding):
        inputs.append({'host': host_mem, 'device': device_mem})
    else:
        outputs.append({'host': host_mem, 'device': device_mem})

# 准备输入数据
inputs[0]['host'] = input_batch.ravel()

# 将输入数据复制到设备
cuda.memcpy_htod_async(inputs[0]['device'], inputs[0]['host'], stream)

# 执行推理
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)

# 将输出数据复制回主机
cuda.memcpy_dtoh_async(outputs[0]['host'], outputs[0]['device'], stream)

# 等待推理完成
stream.synchronize()

# 处理输出
output_data = np.array(outputs[0]['host']).reshape(1, -1)
predicted_class = np.argmax(output_data, axis=1)

print('预测类别索引:', predicted_class[0])

  config.max_workspace_size = 1 << 30  # 1GB
  engine = builder.build_engine(network, config)


预测类别索引: 285


  binding_idx = engine.get_binding_index(binding)
  size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
  size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
  dtype = trt.nptype(engine.get_binding_dtype(binding))
  if engine.binding_is_input(binding):


In [None]:
!nvidia-smi

Thu Oct 31 08:39:11 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    