In [None]:
# torch 설치
!pip install torch

출처 : https://pytorch.org/docs/stable/quantization.html#quantization-api-summary

# Conv2d Model

In [31]:
import torch
import torch.nn as nn

class SingleConvModel(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size):
        super(SingleConvModel, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size)

    def forward(self, x):
        x = self.conv(x)
        return x

# 모델 인스턴스 생성
in_channels = 1  # 입력 이미지의 채널 수 (예: RGB 이미지의 경우 3)
out_channels = 1  # 출력 채널 수
kernel_size = 3  # 컨볼루션 커널 크기 (3x3 커널)

model = SingleConvModel(in_channels, out_channels, kernel_size)

In [32]:
print(model)
model_s = model.state_dict()
print(model_s)

SingleConvModel(
  (conv): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
)
OrderedDict([('conv.weight', tensor([[[[-0.1645,  0.2326,  0.1386],
          [-0.2683,  0.2556,  0.0943],
          [-0.2318,  0.0654,  0.0817]]]])), ('conv.bias', tensor([-0.1088]))])


# 동적 양자화

In [33]:
model_int8 = torch.ao.quantization.quantize_dynamic(
    model,  # the original model
    {torch.nn.Conv2d},  # a set of layers to dynamically quantize
    dtype=torch.qint8)  # the target dtype for quantized weights

print(model_int8)
model_int8_s = model_int8.state_dict()
print(model_int8_s)

SingleConvModel(
  (conv): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
)
OrderedDict([('conv.weight', tensor([[[[-0.1645,  0.2326,  0.1386],
          [-0.2683,  0.2556,  0.0943],
          [-0.2318,  0.0654,  0.0817]]]])), ('conv.bias', tensor([-0.1088]))])


변화없고 추가적인 파라미터 없음

# 정적 양자화

일반적인 방법으로 생성한 모델의 경우 적용 안되는 것으로 보임.
생성할 때 QuantStub과 DeQuantStub 필요


In [44]:
import torch

# define a floating point model where some layers could be statically quantized
class M(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # QuantStub converts tensors from floating point to quantized
        self.quant = torch.ao.quantization.QuantStub()
        self.conv = torch.nn.Conv2d(1, 1, 3)
        self.relu = torch.nn.ReLU()
        # DeQuantStub converts tensors from quantized to floating point
        self.dequant = torch.ao.quantization.DeQuantStub()

    def forward(self, x):
        # manually specify where tensors will be converted from floating
        # point to quantized in the quantized model
        x = self.quant(x)
        x = self.conv(x)
        x = self.relu(x)
        # manually specify where tensors will be converted from quantized
        # to floating point in the quantized model
        x = self.dequant(x)
        return x
    
# 모델 생성
model_fp32 = M()
# 평가 모드
model_fp32.eval()

print(model_fp32)
print(model_fp32.state_dict())

M(
  (quant): QuantStub()
  (conv): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
  (relu): ReLU()
  (dequant): DeQuantStub()
)
OrderedDict([('conv.weight', tensor([[[[-0.2928,  0.0156,  0.2505],
          [ 0.1452,  0.1279,  0.3321],
          [ 0.0904,  0.2869,  0.1864]]]])), ('conv.bias', tensor([-0.0878]))])


In [45]:
# Backend
model_fp32.qconfig = torch.ao.quantization.get_default_qconfig('x86')

# Fuse the activations to preceding layers, where applicable.
# This needs to be done manually depending on the model architecture.
# Common fusions include `conv + relu` and `conv + batchnorm + relu`
model_fp32_fused = torch.ao.quantization.fuse_modules(model_fp32, [['conv', 'relu']])

# Prepare the model for static quantization. This inserts observers in
# the model that will observe activation tensors during calibration.
model_fp32_prepared = torch.ao.quantization.prepare(model_fp32_fused)

# calibrate the prepared model to determine quantization parameters for activations
# in a real world setting, the calibration would be done with a representative dataset
input_fp32 = torch.randn(4, 1, 4, 4)
model_fp32_prepared(input_fp32)

# Convert the observed model to a quantized model. This does several things:
# quantizes the weights, computes and stores the scale and bias value to be
# used with each activation tensor, and replaces key operators with quantized
# implementations.
model_int8 = torch.ao.quantization.convert(model_fp32_prepared)

# run the model, relevant calculations will happen in int8
# res = model_int8(input_fp32)
# print(res)

# 모델 출력
print(model_int8)

print(model_int8.state_dict())

M(
  (quant): Quantize(scale=tensor([0.0430]), zero_point=tensor([63]), dtype=torch.quint8)
  (conv): QuantizedConvReLU2d(1, 1, kernel_size=(3, 3), stride=(1, 1), scale=0.008406509645283222, zero_point=0)
  (relu): Identity()
  (dequant): DeQuantize()
)
OrderedDict([('quant.scale', tensor([0.0430])), ('quant.zero_point', tensor([63])), ('conv.weight', tensor([[[[-0.2917,  0.0156,  0.2501],
          [ 0.1459,  0.1276,  0.3308],
          [ 0.0912,  0.2865,  0.1875]]]], size=(1, 1, 3, 3), dtype=torch.qint8,
       quantization_scheme=torch.per_channel_affine,
       scale=tensor([0.0026], dtype=torch.float64), zero_point=tensor([0]),
       axis=0)), ('conv.bias', Parameter containing:
tensor([-0.0878], requires_grad=True)), ('conv.scale', tensor(0.0084)), ('conv.zero_point', tensor(0))])


quant, conv, relu, dequant 레이어 모두 레이어의 유형이 바뀌고 quantization parameter가 추가됨