Creating a Simple Tensor with Random Items

In [2]:
import numpy as np

In [None]:
# scientic notation is suppressed by below code
# meaning the decimal palce will stay fixed and conversion 
# to exponential(power of 10-Scientific Notation) form will not be there, this will help us analyse the results better

np.set_printoptions(suppress=True)

In [13]:
#generating a random floating point array
params = np.random.uniform(low=-50,high=150,size=20)
params

array([-12.30109077, 126.68785963,  -0.4939466 , 135.85372037,
       -17.51774727, -16.11701896, 102.23461091, -33.5079855 ,
       111.25202791, 121.4723397 ,  93.89520017,  36.7436026 ,
        85.58463402, -27.62545839, -29.72305535, -25.13061787,
        20.13014606,  36.91993243,  45.82819097,  89.02619806])

In [16]:
# adjusting the most important numbers in such a way that we can easily observe the effect of quantization 
params[0] = params.max()-1
params[1] = params.min()-1
params[2] = 0


In [21]:
params

array([134.85372037, -34.5079855 ,   0.        , 135.85372037,
       -17.51774727, -16.11701896, 102.23461091, -33.5079855 ,
       111.25202791, 121.4723397 ,  93.89520017,  36.7436026 ,
        85.58463402, -27.62545839, -29.72305535, -25.13061787,
        20.13014606,  36.91993243,  45.82819097,  89.02619806])

Defining the quantization menthods

In [None]:
# clamp function
def clamp(params_q: np.array, lower_bound:int, upper_bound: int) -> np.array:
    params_q[params_q<lower_bound] = lower_bound
    params[params_q>upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array,float,int]:
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha-beta)/(2**bits-1)
    zero = -1*np.round(beta/scale)
    lower_bound = 0
    upper_bound = 2**bits-1
    quantized = clamp(np.round(params/scale)+zero,lower_bound,upper_bound).astype(np.int32)
    # we convert to int32 as np.round even though converts the values to whole numbers but returns float64 datatype by default
    return quantized, scale, zero

def asymmetric_dequantization(params_q: np.array, scale: float, zero: int) -> np.array:
    return (params_q-zero)*scale

def symmetric_dequantization(params_q: np.array, scale: float) -> np.array:
    return params_q*scale

def symmetric_quantization(params:np.array,bits:int)->tuple[np.array,float]:
    alpha = np.max(abs(params))
    scale = alpha/2**(bits-1)-1
    lower_bound = -2**(bits-1)
    upper_bound = 2**(bits-1)-1
    quantize = clamp(np.round(params/scale),lower_bound,upper_bound).astype(np.int32)
    return quantize,scale

def quantization_error(params:np.array, params_q:np.array):
    return np.mean((params-params_q)**2)


In [23]:
asymmetric_q,asymmetric_scale, asymmetric_quantization_zero = asymmetric_quantization(params,8)
symmetric_q,symmetric_scale = symmetric_quantization(params,8)

In [24]:
asymmetric_q

array([254,   0,  52, 255,  26,  28, 205,   2, 219, 234, 193, 107, 180,
        11,   8,  14,  82, 107, 121, 185])

In [25]:
symmetric_q

array([2198, -128,    0, 2214, -128, -128, 1666, -128, 1813, 1980, 1530,
        599, 1395, -128, -128, -128,  328,  602,  747, 1451])