In [1]:
import numpy as np
import torch
import torch.nn as nn
import sys
import torch.quantization
torch.set_printoptions(precision=30)
np.set_printoptions(precision=30)

In [2]:
class M_quant_fullweight(nn.Module):

    def __init__(self):
            super(M_quant_fullweight, self).__init__()
            # QuantStub converts tensors from floating point to quantized
            self.quant = torch.quantization.QuantStub()
            # self.BN = nn.BatchNorm2d(3)
            # self.conv = torch.nn.Conv2d(1, 1, 1)
            # self.conv = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=3, stride=1, padding=1)
            self.conv = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1)
            # self.conv.weight = torch.nn.Parameter(torch.tensor([[[[ 0.03307433053851127625, -0.13484150171279907227, -0.21625524759292602539], 
            #                                                         [ 0.14247404038906097412, -0.14247404038906097412, -0.24932956695556640625], 
            #                                                         [ 0.32311078906059265137, -0.14501821994781494141, -0.21371106803417205811]]]]))
            self.conv.weight = torch.nn.Parameter(torch.tensor([[[[ 0.0, 0.0, 0.0], 
                                                                    [ 0.0, 0.5, 0.0], 
                                                                    [ 0.0, 0.0, 0.0]]]]))
            self.conv.bias = torch.nn.Parameter(torch.tensor([0.0]))
            # self.relu = torch.nn.ReLU()
            # DeQuantStub converts tensors from quantized to floating point
            self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):

        x = self.quant(x)
        # x = self.BN(x)
        x = self.conv(x)
        # x = self.relu(x)      
        x = self.dequant(x)
         
        return x

In [2]:
def convolve(input_image: np.array, kernel: np.array, bias: np.array) -> np.array:    
    ### Add description of what dimension is what
    k = kernel.shape[0]
    number_input_row = input_image.shape[0]
    number_input_col = input_image.shape[1]
    input_depth = input_image.shape[2]   # R, G, B channel
    filter_depth = kernel.shape[2]       # filter depth
    filter_num = kernel.shape[3]         # number of filters
    bias_depth = bias.shape[0]

    if input_depth != filter_depth:
        print("Error: Number of channels in both image and filter depth must match.")
        sys.exit()

    if bias_depth != filter_num:
        print("Error: Bias depth and filter number must match.")
        sys.exit()
    
    if np.isnan(input_image.all()):
        print("Input image has NaN values")
        sys.exit()

    if np.isnan(kernel.all()):
        print("Kernel has NaN values")
        sys.exit()

    padded_image = np.zeros(shape=(number_input_row+int((k+1)/2), number_input_col+int((k+1)/2), input_depth))
    padded_image[((k-1)>>1):number_input_row + ((k-1)>>1), ((k-1)>>1):number_input_col + ((k-1)>>1), :] = input_image
    convolved_img = np.zeros(shape=(number_input_row, number_input_col, filter_num))

    for f in range(filter_num):
        for i in range(number_input_row):
            for j in range(number_input_col):
                mat = padded_image[i:i+k, j:j+k, :]
                convolved_img[i, j, f ] = np.sum(np.multiply(mat, kernel[:,:,:,f]))
        
        convolved_img[:, :, f ] += bias[f]


    if np.isnan(convolved_img.all()):
        print("convolved Image has NaN values")
        sys.exit()

    return convolved_img

In [3]:
input_fp32 = torch.tensor([[[[ 0.95466703176498413086, -0.136212718486785888672,
            0.75253891944885253906,  1.57104063034057617188],
          [ 0.97250884771347045898, -0.67004448175430297852,
           -0.58047348260879516602,  1.30683445930480957031],
          [-0.13423979282379150391,  0.16391958296298980713,
           -0.71688455343246459961,  0.05846109613776206970],
          [ 1.07569837570190429688, -0.06351475417613983154,
           -0.19469638168811798096, -0.09430617839097976685]]]], requires_grad=False)

In [6]:
input_fp32_debug = torch.tensor([[[[ 0.95466703176498413086, 0.0, 0.0,  0.0],
          [ 0.0, 0.0, 0.0,  0.0],
          [ 0.0, 0.0, 0.0,  0.0],
          [ 0.0, 0.0, 0.0,  0.0]]]], requires_grad=False)

In [6]:
model_quant = M_quant_fullweight()
model_quant.eval()
model_quant.qconfig = torch.quantization.get_default_qconfig('fbgemm')
# model_fp32_fused = torch.quantization.fuse_modules(
#     model_fp32, [['conv', 'relu']])
# model_fp32_fused = torch.quantization.fuse_modules(
#     model_fp32, ['conv'])
model_quant_fullweight = torch.quantization.prepare(model_quant)
model_quant_fullweight(input_fp32)   ### Passing the input through model before conversion for calibration
model_fp32_converted_fullweight = torch.quantization.convert(model_quant_fullweight, inplace=True)

  src_bin_begin // dst_bin_width, 0, self.dst_nbins - 1
  src_bin_end // dst_bin_width, 0, self.dst_nbins - 1


In [7]:
output_quant = model_fp32_converted_fullweight(input_fp32)

In [8]:
output_quant

tensor([[[[ 0.477168589830398559570312500000, -0.072025448083877563476562500000,
            0.369130432605743408203125000000,  0.783276736736297607421875000000],
          [ 0.486171782016754150390625000000, -0.288101792335510253906250000000,
           -0.288101792335510253906250000000,  0.648229002952575683593750000000],
          [-0.072025448083877563476562500000,  0.081028625369071960449218750000,
           -0.288101792335510253906250000000,  0.027009543031454086303710937500],
          [ 0.531187653541564941406250000000, -0.036012724041938781738281250000,
           -0.090031810104846954345703125000, -0.054019086062908172607421875000]]]])

## Quantization Paramters

In [9]:
model_fp32_converted_fullweight.quant

Quantize(scale=tensor([0.016968380659818649291992187500]), zero_point=tensor([34]), dtype=torch.quint8)

In [10]:
model_fp32_converted_fullweight.conv

QuantizedConv2d(1, 1, kernel_size=(3, 3), stride=(1, 1), scale=0.009003181010484695, zero_point=40, padding=(1, 1))

In [None]:
tensor([[[[0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.498039245605468750000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000]]]])

In [56]:
model_fp32_converted_fullweight.conv._weight_bias()[0].shape

torch.Size([1, 1, 3, 3])

In [11]:
# Quant Block Output
model_fp32_converted_fullweight.quant(input_fp32).int_repr()

tensor([[[[ 90,  26,  78, 127],
          [ 91,   0,   0, 111],
          [ 26,  44,   0,  37],
          [ 97,  30,  23,  28]]]], dtype=torch.uint8)

In [38]:
model_fp32_converted_fullweight.conv(model_fp32_converted_fullweight.quant(input_fp32))

tensor([[[[ 0.477168589830398559570312500000, -0.072025448083877563476562500000,
            0.369130432605743408203125000000,  0.783276736736297607421875000000],
          [ 0.486171782016754150390625000000, -0.288101792335510253906250000000,
           -0.288101792335510253906250000000,  0.648229002952575683593750000000],
          [-0.072025448083877563476562500000,  0.081028625369071960449218750000,
           -0.288101792335510253906250000000,  0.027009543031454086303710937500],
          [ 0.531187653541564941406250000000, -0.036012724041938781738281250000,
           -0.090031810104846954345703125000, -0.054019086062908172607421875000]]]],
       size=(1, 1, 4, 4), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.009003181010484695,
       zero_point=40)

In [12]:
# Conv Block Output
model_fp32_converted_fullweight.conv(model_fp32_converted_fullweight.quant(input_fp32)).int_repr()

tensor([[[[ 93,  32,  81, 127],
          [ 94,   8,   8, 112],
          [ 32,  49,   8,  43],
          [ 99,  36,  30,  34]]]], dtype=torch.uint8)

In [13]:
model_fp32_converted_fullweight.quant(input_fp32_debug).int_repr()

tensor([[[[90, 34, 34, 34],
          [34, 34, 34, 34],
          [34, 34, 34, 34],
          [34, 34, 34, 34]]]], dtype=torch.uint8)

In [14]:
model_fp32_converted_fullweight.quant(input_fp32_debug)

tensor([[[[0.950229287147521972656250000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000]]]],
       size=(1, 1, 4, 4), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.01696838065981865,
       zero_point=34)

In [15]:
model_fp32_converted_fullweight.conv(model_fp32_converted_fullweight.quant(input_fp32_debug))

tensor([[[[0.477168589830398559570312500000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000]]]],
       size=(1, 1, 4, 4), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.009003181010484695,
       zero_point=40)

In [18]:
# Conv Block Output
model_fp32_converted_fullweight.conv(model_fp32_converted_fullweight.quant(input_fp32_debug)).int_repr()

tensor([[[[93, 40, 40, 40],
          [40, 40, 40, 40],
          [40, 40, 40, 40],
          [40, 40, 40, 40]]]], dtype=torch.uint8)

##Weight Verification

In [83]:
model_fp32_converted_fullweight.conv._weight_bias()[0].int_repr()

tensor([[[[  0,   0,   0],
          [  0, 127,   0],
          [  0,   0,   0]]]], dtype=torch.int8)

In [33]:
# Quantized weights from pytroch module
model_fp32_converted_fullweight.conv._weight_bias()[0]

tensor([[[[0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.498039245605468750000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000]]]], size=(1, 1, 3, 3),
       dtype=torch.qint8, quantization_scheme=torch.per_channel_affine,
       scale=tensor([0.003921568859368562698364257812], dtype=torch.float64),
       zero_point=tensor([0]), axis=0)

In [58]:
## Convolution weight block zero point
model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_zero_points()

tensor([0])

In [57]:
## Convolution weight block scale
model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_scales()

tensor([0.003921568859368562698364257812], dtype=torch.float64)

In [21]:
##Weight before quantization
model_quant.conv.weight.detach()

tensor([[[[0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.500000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000]]]])

In [59]:
# Manually quantizing weight
torch.quantize_per_tensor(model_quant.conv.weight.detach(), scale = model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_scales(), zero_point = model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_zero_points(), dtype=torch.quint8)

tensor([[[[0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.498039245605468750000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000]]]], size=(1, 1, 3, 3),
       dtype=torch.quint8, quantization_scheme=torch.per_tensor_affine,
       scale=0.003921568859368563, zero_point=0)

In [71]:
quant_conv_weight = (torch.round(model_quant.conv.weight.detach()/model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_scales()) + model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_zero_points()).type(dtype=torch.FloatTensor)

In [72]:
quant_conv_weight

tensor([[[[  0.,   0.,   0.],
          [  0., 127.,   0.],
          [  0.,   0.,   0.]]]])

In [75]:
quant_conv_weight = (quant_conv_weight - model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_zero_points())*model_fp32_converted_fullweight.conv._weight_bias()[0].q_per_channel_scales().type(dtype=torch.FloatTensor)

In [76]:
quant_conv_weight

tensor([[[[0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.001953095194483694735029799538,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000]]]], dtype=torch.float64)

##Weights matched verified

##Block Output verification

In [23]:
quant_conv_weight = torch.tensor([[[[0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.49803924560546875000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000]]]])

In [18]:
input_quant_manual_after_quant = torch.clamp(torch.round(input_fp32.detach()/model_fp32_converted_fullweight.quant.scale)+model_fp32_converted_fullweight.quant.zero_point, min=0)

In [19]:
input_quant_manual_after_quant
#matching with pytorch quant block output


tensor([[[[ 90.,  26.,  78., 127.],
          [ 91.,   0.,   0., 111.],
          [ 26.,  44.,   0.,  37.],
          [ 97.,  30.,  23.,  28.]]]])

In [20]:
input_quant_manual_after_quant_dequant = (input_quant_manual_after_quant - model_fp32_converted_fullweight.quant.zero_point)*model_fp32_converted_fullweight.quant.scale

In [21]:
input_quant_manual_after_quant_dequant

tensor([[[[ 0.950229287147521972656250000000, -0.135747045278549194335937500000,
            0.746608734130859375000000000000,  1.578059434890747070312500000000],
          [ 0.967197716236114501953125000000, -0.576924920082092285156250000000,
           -0.576924920082092285156250000000,  1.306565284729003906250000000000],
          [-0.135747045278549194335937500000,  0.169683814048767089843750000000,
           -0.576924920082092285156250000000,  0.050905141979455947875976562500],
          [ 1.069007992744445800781250000000, -0.067873522639274597167968750000,
           -0.186652183532714843750000000000, -0.101810283958911895751953125000]]]])

In [24]:
convolve_manual_float = convolve(np.reshape(input_quant_manual_after_quant_dequant.detach().numpy()[0][0].astype(np.float32), (4, 4, 1)), np.reshape(quant_conv_weight.detach().numpy()[0][0].astype(np.float32), (3, 3, 1, 1)), np.array([0]))

In [25]:
convolve_manual_float

array([[[ 0.4732514773231742  ],
        [-0.06760735602370005 ],
        [ 0.3718404507089872  ],
        [ 0.78593553047358    ]],

       [[ 0.4817024209455667  ],
        [-0.2873312519686806  ],
        [-0.2873312519686806  ],
        [ 0.6507207887407276  ]],

       [[-0.06760735602370005 ],
        [ 0.0845091987403066  ],
        [-0.2873312519686806  ],
        [ 0.02535275850888752 ]],

       [[ 0.5324079342526602  ],
        [-0.033803678011850025],
        [-0.0929601126772468  ],
        [-0.05070551701777504 ]]])

In [27]:
output_manual_integer = np.round(convolve_manual_float/model_fp32_converted_fullweight.conv.scale) + model_fp32_converted_fullweight.conv.zero_point

In [28]:
output_manual_integer
# matching with that of pytorch block

array([[[ 93.],
        [ 32.],
        [ 81.],
        [127.]],

       [[ 94.],
        [  8.],
        [  8.],
        [112.]],

       [[ 32.],
        [ 49.],
        [  8.],
        [ 43.]],

       [[ 99.],
        [ 36.],
        [ 30.],
        [ 34.]]])

In [63]:
output_manual = (torch.tensor(output_manual_integer, dtype=torch.float32) - model_fp32_converted_fullweight.conv.zero_point)*torch.tensor(model_fp32_converted_fullweight.conv.scale, dtype=torch.float32)

In [30]:
output_manual

array([[[ 0.47716859355568886 ],
        [-0.07202544808387756 ],
        [ 0.3691304214298725  ],
        [ 0.7832767479121685  ]],

       [[ 0.48617177456617355 ],
        [-0.28810179233551025 ],
        [-0.28810179233551025 ],
        [ 0.6482290327548981  ]],

       [[-0.07202544808387756 ],
        [ 0.08102862909436226 ],
        [-0.28810179233551025 ],
        [ 0.027009543031454086]],

       [[ 0.531187679618597   ],
        [-0.03601272404193878 ],
        [-0.09003181010484695 ],
        [-0.05401908606290817 ]]])

In [64]:
np.reshape(output_manual, (1, 4, 4)) - output_quant.detach().numpy()

tensor([[[[0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.]]]])

##Results matching.

In [32]:
input_fp32_debug_quant =  torch.tensor([[[[0.950229287147521972656250000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000]]]])

In [34]:
convolve_manual_debug = convolve(np.reshape(input_fp32_debug_quant.detach().numpy()[0][0].astype(np.float32), (4, 4, 1)), np.reshape(quant_conv_weight.detach().numpy()[0][0].astype(np.float32), (3, 3, 1, 1)), np.array([0]))

In [35]:
convolve_manual_debug

array([[[0.4732514773231742],
        [0.                ],
        [0.                ],
        [0.                ]],

       [[0.                ],
        [0.                ],
        [0.                ],
        [0.                ]],

       [[0.                ],
        [0.                ],
        [0.                ],
        [0.                ]],

       [[0.                ],
        [0.                ],
        [0.                ],
        [0.                ]]])

In [40]:
np.round(convolve_manual_debug/model_fp32_converted_fullweight.conv.scale) + model_fp32_converted_fullweight.conv.zero_point


array([[[93.],
        [40.],
        [40.],
        [40.]],

       [[40.],
        [40.],
        [40.],
        [40.]],

       [[40.],
        [40.],
        [40.],
        [40.]],

       [[40.],
        [40.],
        [40.],
        [40.]]])

In [134]:
model_fp32_converted_fullweight(input_fp32_debug)

tensor([[[[0.477168589830398559570312500000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000]]]])

In [40]:
model_fp32_converted_fullweight.conv(model_fp32_converted_fullweight.quant(input_fp32_debug))

tensor([[[[0.477168589830398559570312500000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000, 0.000000000000000000000000000000]]]],
       size=(1, 1, 4, 4), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.009003181010484695,
       zero_point=40)

In [1]:
model_fp32_converted_fullweight.quant(input_fp32_debug)

NameError: name 'model_fp32_converted_fullweight' is not defined

In [130]:
convolve(np.reshape(input_quant_manual_after_quant_dequant.detach().numpy()[0][0].astype(np.float32), (4, 4, 1)), np.reshape(quant_conv_weight.detach().numpy()[0][0].astype(np.float32), (3, 3, 1, 1)), np.array([0]))

array([[[ 0.4732514773231742  ],
        [-0.06760735602370005 ],
        [ 0.3718404507089872  ],
        [ 0.78593553047358    ]],

       [[ 0.4817024209455667  ],
        [-0.2873312519686806  ],
        [-0.2873312519686806  ],
        [ 0.6507207887407276  ]],

       [[-0.06760735602370005 ],
        [ 0.0845091987403066  ],
        [-0.2873312519686806  ],
        [ 0.02535275850888752 ]],

       [[ 0.5324079342526602  ],
        [-0.033803678011850025],
        [-0.0929601126772468  ],
        [-0.05070551701777504 ]]])

In [115]:
input_quant_manual_after_quant_dequant.detach().numpy()[0][0]


array([[ 0.9502293  , -0.13574705 ,  0.74660873 ,  1.5780594  ],
       [ 0.9671977  , -0.5769249  , -0.5769249  ,  1.3065653  ],
       [-0.13574705 ,  0.16968381 , -0.5769249  ,  0.050905142],
       [ 1.069008   , -0.06787352 , -0.18665218 , -0.101810284]],
      dtype=float32)

In [125]:
quant_conv_weight = torch.tensor([[[[0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.49803924560546875000,
           0.000000000000000000000000000000],
          [0.000000000000000000000000000000, 0.000000000000000000000000000000,
           0.000000000000000000000000000000]]]])

In [None]:
model_fp32_converted_fullweight.conv._weight_bias()[0].int_repr()

In [26]:
convolve_manual = convolve(np.reshape(input_quant_manual_after_quant.detach().numpy(), (4, 4, 1)), np.reshape(model_fp32_converted_fullweight.conv._weight_bias()[0].int_repr().detach().numpy()[0][0], (3, 3, 1, 1)), np.array([0]))

In [27]:
convolve_manual

array([[[11430.],
        [ 3302.],
        [ 9906.],
        [16129.]],

       [[11557.],
        [    0.],
        [    0.],
        [14097.]],

       [[ 3302.],
        [ 5588.],
        [    0.],
        [ 4699.]],

       [[12319.],
        [ 3810.],
        [ 2921.],
        [ 3556.]]])

In [4]:
class M_quant_fullweight_bias(nn.Module):

    def __init__(self):
            super(M_quant_fullweight_bias, self).__init__()
            # QuantStub converts tensors from floating point to quantized
            self.quant = torch.quantization.QuantStub()
            # self.BN = nn.BatchNorm2d(3)
            # self.conv = torch.nn.Conv2d(1, 1, 1)
            # self.conv = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=3, stride=1, padding=1)
            self.conv = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1)
            self.conv.weight = torch.nn.Parameter(torch.tensor([[[[ 0.03307433053851127625, -0.13484150171279907227, -0.21625524759292602539], 
                                                                    [ 0.14247404038906097412, -0.14247404038906097412, -0.24932956695556640625], 
                                                                    [ 0.32311078906059265137, -0.14501821994781494141, -0.21371106803417205811]]]]))
            # self.conv.weight = torch.nn.Parameter(torch.tensor([[[[ 0.0, 0.0, 0.0], 
            #                                                         [ 0.0, 0.5, 0.0], 
            #                                                         [ 0.0, 0.0, 0.0]]]]))
            # self.conv.bias = torch.nn.Parameter(torch.tensor([0.0]))
            self.conv.bias = torch.nn.Parameter(torch.tensor([2.12300]))
            # self.conv.bias = torch.nn.Parameter(torch.tensor([2.11232137680053710936300]))
            # 2.11232137680053710937500
            # self.relu = torch.nn.ReLU()
            # DeQuantStub converts tensors from quantized to floating point
            self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):

        x = self.quant(x)
        # x = self.BN(x)
        x = self.conv(x)
        # x = self.relu(x)      
        x = self.dequant(x)
         
        return x

In [5]:
model_quant_bias = M_quant_fullweight_bias()
model_quant_bias.eval()
model_quant_bias.qconfig = torch.quantization.get_default_qconfig('fbgemm')
# model_fp32_fused = torch.quantization.fuse_modules(
#     model_fp32, [['conv', 'relu']])
# model_fp32_fused = torch.quantization.fuse_modules(
#     model_fp32, ['conv'])
model_quant_fullweight_bias = torch.quantization.prepare(model_quant_bias)
model_quant_fullweight_bias(input_fp32)   ### Passing the input through model before conversion for calibration
model_fp32_converted_fullweight_bias = torch.quantization.convert(model_quant_fullweight_bias, inplace=True)

  src_bin_begin // dst_bin_width, 0, self.dst_nbins - 1
  src_bin_end // dst_bin_width, 0, self.dst_nbins - 1


##Quantization Parameters

In [6]:
model_fp32_converted_fullweight_bias.quant

Quantize(scale=tensor([0.016968380659818649291992187500]), zero_point=tensor([34]), dtype=torch.quint8)

In [7]:
model_fp32_converted_fullweight_bias.conv

QuantizedConv2d(1, 1, kernel_size=(3, 3), stride=(1, 1), scale=0.02287949062883854, zero_point=0, padding=(1, 1))

In [8]:
model_fp32_converted_fullweight_bias.conv._weight_bias()

(tensor([[[[ 0.032944630831480026245117187500, -0.134312719106674194335937500000,
            -0.215407192707061767578125000000],
           [ 0.141915321350097656250000000000, -0.141915321350097656250000000000,
            -0.248351827263832092285156250000],
           [ 0.321843683719635009765625000000, -0.144449532032012939453125000000,
            -0.212872996926307678222656250000]]]], size=(1, 1, 3, 3),
        dtype=torch.qint8, quantization_scheme=torch.per_channel_affine,
        scale=tensor([0.002534202300012111663818359375], dtype=torch.float64),
        zero_point=tensor([0]), axis=0),
 Parameter containing:
 tensor([2.122999906539916992187500000000], requires_grad=True))

In [9]:
model_fp32_converted_fullweight_bias.conv._weight_bias()[0].int_repr()

tensor([[[[ 13, -53, -85],
          [ 56, -56, -98],
          [127, -57, -84]]]], dtype=torch.int8)

In [10]:
model_fp32_converted_fullweight_bias.quant(input_fp32)

tensor([[[[ 0.950229287147521972656250000000, -0.135747045278549194335937500000,
            0.746608734130859375000000000000,  1.578059434890747070312500000000],
          [ 0.967197716236114501953125000000, -0.576924920082092285156250000000,
           -0.576924920082092285156250000000,  1.306565284729003906250000000000],
          [-0.135747045278549194335937500000,  0.169683814048767089843750000000,
           -0.576924920082092285156250000000,  0.050905141979455947875976562500],
          [ 1.069007992744445800781250000000, -0.067873522639274597167968750000,
           -0.186652183532714843750000000000, -0.101810283958911895751953125000]]]],
       size=(1, 1, 4, 4), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.01696838065981865,
       zero_point=34)

In [11]:
model_fp32_converted_fullweight_bias.quant(input_fp32).int_repr()

tensor([[[[ 90,  26,  78, 127],
          [ 91,   0,   0, 111],
          [ 26,  44,   0,  37],
          [ 97,  30,  23,  28]]]], dtype=torch.uint8)

In [12]:
model_fp32_converted_fullweight_bias.conv(model_fp32_converted_fullweight_bias.quant(input_fp32))

tensor([[[[2.013395071029663085937500000000, 2.608261823654174804687500000000,
           1.235492467880249023437500000000, 1.624443888664245605468750000000],
          [2.013395071029663085937500000000, 2.425225973129272460937500000000,
           1.487166881561279296875000000000, 1.464287400245666503906250000000],
          [1.944756746292114257812500000000, 2.859936237335205078125000000000,
           2.013395071029663085937500000000, 1.784600257873535156250000000000],
          [1.967636227607727050781250000000, 2.425225973129272460937500000000,
           2.242190122604370117187500000000, 2.082033634185791015625000000000]]]],
       size=(1, 1, 4, 4), dtype=torch.quint8,
       quantization_scheme=torch.per_tensor_affine, scale=0.02287949062883854,
       zero_point=0)

In [13]:
model_fp32_converted_fullweight_bias.conv(model_fp32_converted_fullweight_bias.quant(input_fp32)).int_repr()

tensor([[[[ 88, 114,  54,  71],
          [ 88, 106,  65,  64],
          [ 85, 125,  88,  78],
          [ 86, 106,  98,  91]]]], dtype=torch.uint8)

In [14]:
model_quant_bias.conv.bias

Parameter containing:
tensor([2.122999906539916992187500000000], requires_grad=True)

In [35]:
# quant_conv_bias = (torch.round(model_quant_bias.conv.bias/(model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_scales()*model_fp32_converted_fullweight_bias.conv.scale))- model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_zero_points()).type(dtype=torch.FloatTensor)

In [36]:
# quant_conv_bias

tensor([23639.], grad_fn=<ToCopyBackward0>)

In [37]:
# model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_scales().type(dtype=torch.FloatTensor)

tensor([0.003921568859368562698364257812])

In [15]:
# ((quant_conv_bias - model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_zero_points())*model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_scales().type(dtype=torch.FloatTensor)*model_fp32_converted_fullweight_bias.conv.scale)

tensor([2.112329006195068359375000000000], grad_fn=<MulBackward0>)

In [15]:
model_fp32_converted_fullweight_bias.conv._weight_bias()[1]

Parameter containing:
tensor([2.122999906539916992187500000000], requires_grad=True)

##Weight Verification same as the non bias case

In [16]:
quant_conv_weight = (torch.round(model_quant_bias.conv.weight.detach()/model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_scales()) + model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_zero_points()).type(dtype=torch.FloatTensor)
quant_conv_weight = (quant_conv_weight - model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_zero_points())*model_fp32_converted_fullweight_bias.conv._weight_bias()[0].q_per_channel_scales().type(dtype=torch.FloatTensor)

In [17]:
input_quant_manual_after_quant = torch.clamp(torch.round(input_fp32.detach()/model_fp32_converted_fullweight_bias.quant.scale)+model_fp32_converted_fullweight_bias.quant.zero_point, min=0)
input_quant_manual_after_quant_dequant = (input_quant_manual_after_quant - model_fp32_converted_fullweight_bias.quant.zero_point)*model_fp32_converted_fullweight_bias.quant.scale
# convolve_manual_float = convolve(np.reshape(input_quant_manual_after_quant_dequant.detach().numpy()[0][0].astype(np.float32), (input_fp32.shape[2], input_fp32.shape[3], input_fp32.shape[1])), np.reshape(quant_conv_weight.detach().numpy().astype(np.float32), (quant_conv_weight.shape[2], quant_conv_weight.shape[3], quant_conv_weight.shape[0], quant_conv_weight.shape[1])), np.array([0]))
convolve_manual_float = convolve(np.reshape(input_quant_manual_after_quant_dequant.detach().numpy()[0][0].astype(np.float32), (input_fp32.shape[2], input_fp32.shape[3], input_fp32.shape[1])), np.reshape(quant_conv_weight.detach().numpy().astype(np.float32), (quant_conv_weight.shape[2], quant_conv_weight.shape[3], quant_conv_weight.shape[0], quant_conv_weight.shape[1])), model_fp32_converted_fullweight_bias.conv._weight_bias()[1].detach().numpy())
output_manual_integer = np.round(convolve_manual_float/model_fp32_converted_fullweight_bias.conv.scale) + model_fp32_converted_fullweight_bias.conv.zero_point
output_manual = (torch.tensor(output_manual_integer, dtype=torch.float32) - model_fp32_converted_fullweight_bias.conv.zero_point)*torch.tensor(model_fp32_converted_fullweight_bias.conv.scale, dtype=torch.float32)

In [18]:
input_quant_manual_after_quant

tensor([[[[ 90.,  26.,  78., 127.],
          [ 91.,   0.,   0., 111.],
          [ 26.,  44.,   0.,  37.],
          [ 97.,  30.,  23.,  28.]]]])

In [19]:
input_quant_manual_after_quant_dequant

tensor([[[[ 0.950229287147521972656250000000, -0.135747045278549194335937500000,
            0.746608734130859375000000000000,  1.578059434890747070312500000000],
          [ 0.967197716236114501953125000000, -0.576924920082092285156250000000,
           -0.576924920082092285156250000000,  1.306565284729003906250000000000],
          [-0.135747045278549194335937500000,  0.169683814048767089843750000000,
           -0.576924920082092285156250000000,  0.050905141979455947875976562500],
          [ 1.069007992744445800781250000000, -0.067873522639274597167968750000,
           -0.186652183532714843750000000000, -0.101810283958911895751953125000]]]])

In [20]:
convolve_manual_float


array([[[2.004961317885303 ],
        [2.6091296907036874],
        [1.225390583748318 ],
        [1.6305919276975587]],

       [[2.014120595676534 ],
        [2.4287392012361857],
        [1.4809473607325894],
        [1.4753142189279387]],

       [[1.9545207649587837],
        [2.850152028546039 ],
        [2.020140772369282 ],
        [1.7940399084887053]],

       [[1.9698292478240498],
        [2.4277071696669   ],
        [2.237254380045981 ],
        [2.0851157545525902]]])

In [21]:
output_manual_integer

array([[[ 88.],
        [114.],
        [ 54.],
        [ 71.]],

       [[ 88.],
        [106.],
        [ 65.],
        [ 64.]],

       [[ 85.],
        [125.],
        [ 88.],
        [ 78.]],

       [[ 86.],
        [106.],
        [ 98.],
        [ 91.]]])

In [22]:
output_manual.reshape(1, 4, 4)

tensor([[[2.013395071029663085937500000000, 2.608261823654174804687500000000,
          1.235492467880249023437500000000, 1.624443888664245605468750000000],
         [2.013395071029663085937500000000, 2.425225973129272460937500000000,
          1.487166881561279296875000000000, 1.464287400245666503906250000000],
         [1.944756746292114257812500000000, 2.859936237335205078125000000000,
          2.013395071029663085937500000000, 1.784600257873535156250000000000],
         [1.967636227607727050781250000000, 2.425225973129272460937500000000,
          2.242190122604370117187500000000, 2.082033634185791015625000000000]]])

In [23]:
model_fp32_converted_fullweight_bias((input_fp32))[0]

tensor([[[2.013395071029663085937500000000, 2.608261823654174804687500000000,
          1.235492467880249023437500000000, 1.624443888664245605468750000000],
         [2.013395071029663085937500000000, 2.425225973129272460937500000000,
          1.487166881561279296875000000000, 1.464287400245666503906250000000],
         [1.944756746292114257812500000000, 2.859936237335205078125000000000,
          2.013395071029663085937500000000, 1.784600257873535156250000000000],
         [1.967636227607727050781250000000, 2.425225973129272460937500000000,
          2.242190122604370117187500000000, 2.082033634185791015625000000000]]])

In [24]:
output_manual.reshape(1, 4, 4) - model_fp32_converted_fullweight_bias((input_fp32))[0].detach().numpy()

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

###Results are matching

In [29]:
class M_quant_fullweight_bias2(nn.Module):

    def __init__(self):
            super(M_quant_fullweight_bias2, self).__init__()
            # QuantStub converts tensors from floating point to quantized
            self.quant = torch.quantization.QuantStub()
            # self.BN = nn.BatchNorm2d(3)
            # self.conv = torch.nn.Conv2d(1, 1, 1)
            # self.conv = torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=3, stride=1, padding=1)
            self.conv = torch.nn.Conv2d(in_channels=1, out_channels=2, kernel_size=3, stride=1, padding=1)
            # self.conv.bias = torch.nn.Parameter(torch.tensor([0.0]))
            # self.conv.bias = torch.nn.Parameter(torch.tensor([2.12300]))
            # self.relu = torch.nn.ReLU()
            # DeQuantStub converts tensors from quantized to floating point
            self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):

        # x = self.quant(x)
        # x = self.BN(x)
        x = self.conv(x)
        # x = self.relu(x)      
        # x = self.dequant(x)

        return x
         

In [30]:
model_quant_bias2 = M_quant_fullweight_bias2()
model_quant_bias2.eval()

M_quant_fullweight_bias2(
  (quant): QuantStub()
  (conv): Conv2d(1, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dequant): DeQuantStub()
)

In [31]:
model_quant_bias2.conv.weight

Parameter containing:
tensor([[[[ 0.172700405120849609375000000000,  0.035670280456542968750000000000,
           -0.009075443260371685028076171875],
          [ 0.219669222831726074218750000000, -0.273351132869720458984375000000,
            0.183603286743164062500000000000],
          [-0.041898608207702636718750000000,  0.143252611160278320312500000000,
            0.286942601203918457031250000000]]],


        [[[ 0.043319266289472579956054687500, -0.117927871644496917724609375000,
           -0.044052362442016601562500000000],
          [-0.305706858634948730468750000000, -0.012369632720947265625000000000,
           -0.066270038485527038574218750000],
          [ 0.177178949117660522460937500000,  0.223731324076652526855468750000,
           -0.018801212310791015625000000000]]]], requires_grad=True)

In [49]:
model_quant_bias2.conv.bias.detach().numpy()

array([-0.11298935,  0.32182777], dtype=float32)

In [37]:
input_fp32 = torch.rand(1, 1, 9, 9)

In [38]:
output_fp32 = model_quant_bias2(input_fp32)

In [39]:
output_fp32.shape

torch.Size([1, 2, 9, 9])

In [40]:
model_quant_bias2.conv.weight.shape

torch.Size([2, 1, 3, 3])

In [41]:
model_quant_bias2.conv.weight.detach().numpy()

array([[[[ 0.1727004  ,  0.03567028 , -0.009075443],
         [ 0.21966922 , -0.27335113 ,  0.18360329 ],
         [-0.04189861 ,  0.14325261 ,  0.2869426  ]]],


       [[[ 0.043319266, -0.11792787 , -0.044052362],
         [-0.30570686 , -0.012369633, -0.06627004 ],
         [ 0.17717895 ,  0.22373132 , -0.018801212]]]], dtype=float32)

In [43]:
weight_shape = model_quant_bias2.conv.weight.shape

In [44]:
weight_shape

torch.Size([2, 1, 3, 3])

In [42]:
input_fp32.shape

torch.Size([1, 1, 9, 9])

In [54]:
np.reshape(model_quant_bias2.conv.weight.detach().numpy(), (weight_shape[2], weight_shape[3], weight_shape[1], weight_shape[0]))

(3, 3, 1, 2)

In [56]:
np.reshape(input_fp32[0].detach().numpy(), (input_fp32.shape[2], input_fp32.shape[3], input_fp32.shape[1])).shape

(9, 9, 1)

In [59]:
output_manual = convolve(np.reshape(input_fp32[0].detach().numpy(), (input_fp32.shape[2], input_fp32.shape[3], input_fp32.shape[1])), np.reshape(model_quant_bias2.conv.weight.detach().numpy(), (weight_shape[2], weight_shape[3], weight_shape[1], weight_shape[0])), model_quant_bias2.conv.bias.detach().numpy())

In [68]:
output_manual.shape

(9, 9, 2)

In [81]:
np.reshape(output_manual, (output_manual.shape[2], output_manual.shape[0], output_manual.shape[1])).shape

(2, 9, 9)

In [80]:
output_fp32.detach().numpy()[0].shape

(2, 9, 9)

In [82]:
output_fp32.detach().numpy()[0] - np.reshape(output_manual, (output_manual.shape[2], output_manual.shape[0], output_manual.shape[1]))

array([[[ 0.008939431290664768, -0.47929776516463996 ,
          0.20150431767722532 , -0.2652077691154997  ,
         -0.06669273028810441 ,  0.11252881644845192 ,
          0.23876764493679614 , -0.02235389768274354 ,
         -0.3857006154869582  ],
        [-0.41669642515707817 ,  0.33482532251647745 ,
         -0.1313713876479965  ,  0.13927097917753484 ,
         -0.24526724250558618 ,  0.9736006857956743  ,
         -0.2730402102606153  ,  0.3857928696888635  ,
         -0.35428114243445985 ],
        [-0.043083355577246074, -0.41446137684353945 ,
          0.45651978132309196 , -0.3336694059115297  ,
          0.2285382187095249  , -0.29916033718485835 ,
          0.3345687276350354  , -0.2113290105302581  ,
          0.5110127803924256  ],
        [-0.5935926826608915  ,  0.3221946724873298  ,
         -0.2815999466519523  ,  0.3537310200971195  ,
         -0.5959343185042352  ,  0.5816943174923972  ,
         -0.6225939321115452  ,  0.833251545330324   ,
         -0.765243644