# Fully Connected

In [39]:
import torch as th
import warnings

warnings.filterwarnings("ignore")
th.backends.quantized.engine = "qnnpack"  # for ARM CPU
th.manual_seed(0)

<torch._C.Generator at 0x110f8dad0>

In [40]:
class Model(th.nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = th.ao.quantization.QuantStub()
        self.fc = th.nn.Linear(3, 2, bias=False)
        self.dequant = th.ao.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.fc(x)
        x = self.dequant(x)
        return x

In [41]:
m = Model()

m.qconfig = th.ao.quantization.QConfig(
    activation=th.ao.quantization.MovingAverageMinMaxObserver.with_args(
        quant_min=-128,
        quant_max=127,
        dtype=th.qint8,
        qscheme=th.per_tensor_symmetric,
        reduce_range=False,
    ),
    weight=th.ao.quantization.MovingAverageMinMaxObserver.with_args(
        quant_min=-128,
        quant_max=127,
        dtype=th.qint8,
        qscheme=th.per_tensor_symmetric,
        reduce_range=False,
    ),
)

# Prepare
pm = th.ao.quantization.prepare_qat(m)

# Train
pm(th.rand(32, 3))

# Convert
qm = th.ao.quantization.convert(pm.eval())

In [42]:
# Test
x = th.rand(1, 3)
xq = qm.quant(x)
y = qm(x)
yq = th.round(y / qm.quant.scale)

print(f"Float input: {x}\n")
print(f"Quantized input: {xq}\n")
print(f"Float output: {y}\n")
print(f"Quantized output: {yq}\n")
print(f"Quantized weights: {qm.fc.weight().int_repr()}\n")
print(f"Quantized bias: {qm.fc.bias()}\n")

Float input: tensor([[0.4725, 0.5751, 0.2952]])

Quantized input: tensor([[0.4692, 0.5787, 0.2972]], size=(1, 3), dtype=torch.qint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.007820130325853825,
       zero_point=0)

Float output: tensor([[ 0.0374, -0.2808]])

Quantized output: tensor([[  5., -36.]])

Quantized weights: tensor([[  -1,   83, -127],
        [-114,  -60,   42]], dtype=torch.int8)

Quantized bias: tensor([0., 0.])

