In [1]:
import torch
from nanodet.util import cfg, load_config, load_model_weight
from nanodet.model.arch import build_model

cfg_path = "config/nanodet-plus-m_416.yml"
load_config(cfg, cfg_path)

model = build_model(cfg.model)

ckpt = torch.load("data/nanodet-plus-m_416.pth", map_location="cpu")
logger = None  # nếu cần log thì khởi tạo Logger
load_model_weight(model, ckpt, logger=None)

model.eval()


  from .autonotebook import tqdm as notebook_tqdm


model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


NanoDetPlus(
  (backbone): ShuffleNetV2(
    (conv1): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.1, inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (stage2): Sequential(
      (0): ShuffleV2Block(
        (branch1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (3): BatchNorm2d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (4): LeakyReLU(negative_slope=0.1, inplace=True)
        )
        (branch2): Sequential(
          (0): Conv2d(24, 58, kernel_size=(1, 1),

In [2]:
from torchviz import make_dot
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
x = torch.randn(1, 3, 416, 416).to(device)
y = model(x)

if isinstance(y, tuple):
    y = y[0]

make_dot(y, params=dict(model.named_parameters())).render("model_visualization/model", format="png")


'model_visualization/model.png'

In [3]:
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
x = torch.randn(1, 3, 416, 416).to(next(model.parameters()).device)
output = model(x)
print("Input Shape: ", x.shape)
print("Output Shape: ", output.shape if isinstance(output, torch.Tensor) else [o.shape for o in output])


Input Shape:  torch.Size([1, 3, 416, 416])
Output Shape:  torch.Size([1, 3598, 112])


In [4]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
# Giả sử model là mô hình của bạn, và device là 'cpu' hoặc 'cuda'
x = torch.randn(1, 3, 416, 416).to(next(model.parameters()).device)
output = model(x)

# Kiểm tra input shape và output shape
print("Input Shape: ", x.shape)
print("Output Shape: ", output.shape if isinstance(output, torch.Tensor) else [o.shape for o in output])


Input Shape:  torch.Size([1, 3, 416, 416])
Output Shape:  torch.Size([1, 3598, 112])


In [5]:
from torchinfo import summary

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

summary(model, input_size=(1, 3, 416, 416), device=device)


Layer (type:depth-idx)                                       Output Shape              Param #
NanoDetPlus                                                  [1, 3598, 112]            3,132,980
├─ShuffleNetV2: 1-1                                          [1, 116, 52, 52]          --
│    └─Sequential: 2-1                                       [1, 24, 208, 208]         --
│    │    └─Conv2d: 3-1                                      [1, 24, 208, 208]         648
│    │    └─BatchNorm2d: 3-2                                 [1, 24, 208, 208]         48
│    │    └─LeakyReLU: 3-3                                   [1, 24, 208, 208]         --
│    └─MaxPool2d: 2-2                                        [1, 24, 104, 104]         --
│    └─Sequential: 2-3                                       [1, 116, 52, 52]          --
│    │    └─ShuffleV2Block: 3-4                              [1, 116, 52, 52]          7,398
│    │    └─ShuffleV2Block: 3-5                              [1, 116, 52, 52]       

In [6]:
class WrapperModel(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        out = self.model(x)
        # Nếu out là tuple, chỉ lấy phần tử đầu tiên hoặc phần cần thiết
        if isinstance(out, tuple):
            return out[0]
        return out


In [12]:
import torch
import torch.quantization

# Giả sử model đã được load và chuẩn bị sẵn
model.eval()  # Chuyển mô hình về chế độ đánh giá

# Áp dụng lượng tử hóa trước khi kiểm tra
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
torch.quantization.prepare(model, inplace=True)

# Thực hiện một số bước inference để mô hình chuẩn bị lượng tử hóa
# Giả sử bạn có một hàm inference, bạn có thể sử dụng nó ở đây để huấn luyện mô hình
# Trên bộ dữ liệu kiểm thử hoặc một batch bất kỳ


NanoDetPlus(
  (backbone): ShuffleNetV2(
    (conv1): Sequential(
      (0): QuantizedConv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), scale=0.02908726967871189, zero_point=66, padding=(1, 1), bias=False)
      (1): QuantizedBatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): QuantizedLeakyReLU(negative_slope=0.1, inplace=True)
    )
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (stage2): Sequential(
      (0): ShuffleV2Block(
        (branch1): Sequential(
          (0): QuantizedConv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), scale=0.002110641449689865, zero_point=75, padding=(1, 1), groups=24, bias=False)
          (1): QuantizedBatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): QuantizedConv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), scale=0.006850589998066425, zero_point=75, bias=False)
          (3): QuantizedBatchNorm2d(58, eps=1e-05, momentum=0.

In [11]:
import time

# Đo thời gian trước khi chuyển đổi
start_time = time.time()
model(dummy_input)
print(f"Time for FP32: {time.time() - start_time:.4f} seconds")

# Đo thời gian sau khi chuyển đổi sang INT8
start_time = time.time()
model(dummy_input)
print(f"Time for INT8: {time.time() - start_time:.4f} seconds")


NotImplementedError: Could not run 'quantized::conv2d.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d.new' is only available for these backends: [QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].

QuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv.cpp:1449 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:140 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:488 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:291 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:18 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:64 [backend fallback]
AutogradOther: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:35 [backend fallback]
AutogradCPU: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:39 [backend fallback]
AutogradCUDA: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:47 [backend fallback]
AutogradXLA: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:51 [backend fallback]
AutogradMPS: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:59 [backend fallback]
AutogradXPU: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:43 [backend fallback]
AutogradHPU: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:68 [backend fallback]
AutogradLazy: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:55 [backend fallback]
Tracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:296 [backend fallback]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:482 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:324 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:743 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at ../aten/src/ATen/BatchingRegistrations.cpp:1064 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:189 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:148 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:484 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:144 [backend fallback]
