In [8]:
import torch
x = torch.rand(2,3, dtype=torch.float32)
print(x)

tensor([[0.9435, 0.8621, 0.4736],
        [0.8787, 0.8310, 0.1041]])


In [9]:
xq = torch.quantize_per_tensor(x, scale = 0.5, zero_point = 128, dtype=torch.quint8)
# 这里打印的是张量的反量化表示
print(xq)

tensor([[1.0000, 1.0000, 0.5000],
        [1.0000, 1.0000, 0.0000]], size=(2, 3), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.5, zero_point=128)


In [10]:
# 这个表示张量实际存储的整数表示
print(xq.int_repr())

tensor([[130, 130, 129],
        [130, 130, 128]], dtype=torch.uint8)


In [11]:
xdq = xq.dequantize()
print(xdq)

tensor([[1.0000, 1.0000, 0.5000],
        [1.0000, 1.0000, 0.0000]])


In [12]:
from torch import nn

class CivilNet(nn.Module):
    def __init__(self):
        super(CivilNet, self).__init__()
        gemfieldin = 1
        gemfieldout = 1
        self.conv = nn.Conv2d(gemfieldin, gemfieldout, kernel_size=1, stride=1, padding=0, groups=1, bias=False)
        self.fc = nn.Linear(3, 2,bias=False)
        self.relu = nn.ReLU(inplace=False)

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        x = self.relu(x)
        return x
civilnet = CivilNet()
print(civilnet)

CivilNet(
  (conv): Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (fc): Linear(in_features=3, out_features=2, bias=False)
  (relu): ReLU()
)


In [13]:
# 不是原地操作
# 这里使用定义在nnqd.Linear的from_float()方法，通过new_mod = mapping[type(mod)].from_float(mod)对新模型进行实例化
q_civilnet = torch.quantization.quantize_dynamic(civilnet)
print(civilnet)
print(q_civilnet)
print(q_civilnet.state_dict()['fc._packed_params._packed_params'])

CivilNet(
  (conv): Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (fc): Linear(in_features=3, out_features=2, bias=False)
  (relu): ReLU()
)
CivilNet(
  (conv): Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (fc): DynamicQuantizedLinear(in_features=3, out_features=2, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (relu): ReLU()
)
(tensor([[ 0.4815,  0.2541, -0.3433],
        [-0.3076,  0.5217,  0.5662]], size=(2, 3), dtype=torch.qint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.00445860531181097,
       zero_point=0), None)


In [14]:
# fuse_model,原地操作
torch.quantization.fuse_modules(civilnet,[['fc','relu']],inplace=True)
print(civilnet)

CivilNet(
  (conv): Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (fc): LinearReLU(
    (0): Linear(in_features=3, out_features=2, bias=False)
    (1): ReLU()
  )
  (relu): Identity()
)


In [15]:
# 设置qconfig
civilnet.qconfig = torch.quantization.get_default_qconfig('fbgemm')
print(civilnet)

CivilNet(
  (conv): Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (fc): LinearReLU(
    (0): Linear(in_features=3, out_features=2, bias=False)
    (1): ReLU()
  )
  (relu): Identity()
)


In [16]:
# 设置prepare
civilnet_prepared = torch.quantization.prepare(civilnet)
print(civilnet_prepared)

CivilNet(
  (conv): Conv2d(
    1, 1, kernel_size=(1, 1), stride=(1, 1), bias=False
    (activation_post_process): HistogramObserver()
  )
  (fc): LinearReLU(
    (0): Linear(in_features=3, out_features=2, bias=False)
    (1): ReLU()
    (activation_post_process): HistogramObserver()
  )
  (relu): Identity()
)




In [18]:
civilnet_prepared_int8 = torch.quantization.convert(civilnet_prepared)
print(civilnet_prepared_int8)

CivilNet(
  (conv): QuantizedConv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0, bias=False)
  (fc): QuantizedLinearReLU(in_features=3, out_features=2, scale=1.0, zero_point=0, qscheme=torch.per_channel_affine)
  (relu): Identity()
)




In [None]:
# QAT
cnet = CivilNet()
cnet.train()
cnet.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')