# Fully Connected

In [2]:
import torch as th

th.backends.quantized.engine = "qnnpack"  # for ARM CPU
th.manual_seed(0)

<torch._C.Generator at 0x110f8dad0>

In [15]:
class Model(th.nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = th.ao.quantization.QuantStub()
        self.fc = th.nn.Linear(3, 2, bias=False)
        # self.conv1 = th.nn.Conv2d(1, 2, 3, 1, 1, 1, bias=True)
        # self.conv2 = th.nn.Conv2d(2, 1, 3, 1, 1, 1, bias=False)
        self.dequant = th.ao.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.fc(x)
        # x = self.conv2(x)
        # x = self.dequant(x)
        return x

In [16]:
m = Model()

m.qconfig = th.ao.quantization.QConfig(
    activation=th.ao.quantization.MovingAverageMinMaxObserver.with_args(
        quant_min=-128,
        quant_max=127,
        dtype=th.qint8,
        qscheme=th.per_tensor_symmetric,
        reduce_range=False,
    ),
    weight=th.ao.quantization.MovingAverageMinMaxObserver.with_args(
        quant_min=-128,
        quant_max=127,
        dtype=th.qint8,
        qscheme=th.per_tensor_symmetric,
        reduce_range=False,
    ),
)

# Prepare
pm = th.ao.quantization.prepare_qat(m)

# Train
pm(th.rand(32, 3))

# Convert
qm = th.ao.quantization.convert(pm.eval())

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  pm = th.ao.quantization.prepare_qat(m)
For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (

In [22]:
# Test
x = th.rand(1, 3)
xq = qm.quant(x)
yq = qm(x)
x, xq.int_repr(), yq.int_repr(), qm.fc.weight().int_repr(), qm.fc.bias()

(tensor([[0.6651, 0.8751, 0.3390]]),
 tensor([[ 85, 112,  43]], dtype=torch.int8),
 tensor([[ 16, -78]], dtype=torch.int8),
 tensor([[  52,    8,  -65],
         [  63, -127,  -79]], dtype=torch.int8),
 tensor([0., 0.]))