In [2]:
import numpy as np

In [4]:
np.set_printoptions(suppress=True)

In [6]:
params = np.random.uniform(low=-50, high=150, size=10000)
#introduce a outlier
params[-1]=1000
params = np.round(params,2)
print(params)

[  17.46   43.47   21.64 ...   79.29  149.12 1000.  ]


In [7]:
# clamp function
def clamp(params_q: np.array, lower_bound:int, upper_bound: int) -> np.array:
    params_q[params_q<lower_bound] = lower_bound
    params[params_q>upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array,float,int]:
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha-beta)/(2**bits-1)
    zero = -1*np.round(beta/scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params/scale)+zero,lower_bound,upper_bound).astype(np.int32)
    # we convert to int32 as np.round even though converts the values to whole numbers but returns float64 datatype by default
    return quantized, scale, zero

def asymmetric_quantization_percentile(params: np.array, bits: int, percentile: float=99.99) -> tuple[np.array,float,int]:
    # setting float = 99.99 as default is to avoid outliers as alpha value in case no user provides no parameter for percentile
    alpha = np.percentile(params, percentile)
    beta = np.percentile(params, 100-percentile)
    scale = (alpha-beta)/(2**bits-1)
    zero = -1*np.round(beta/scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params/scale)+zero,lower_bound,upper_bound).astype(np.int32)
    return quantized, scale, zero

In [8]:
(asymmetric_q,asymmetric_scale, asymmetric_quantization_zero) = asymmetric_quantization(params,8)
(asymmetric_q_percentile, asymetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params,8)

In [None]:
print('Original:')
print(params)
print('')
print(f'Asymmetric scale: {asymmetric_scale}, zero: {asymmetric_quantization_zero}')
print(asymmetric_q)
print('')
print(f'Asymmetric Percentile scale: {asymetric_scale_percentile}, Percentile Zero: {asymmetric_zero_percentile}')
print(asymmetric_q_percentile)


Original:
[ 17.46  43.47  21.64 ...  79.29 149.12 255.  ]

Asymmetric (min-max) scale: 4.117607843137255, zero: 12.0
[ 16  23  17 ...  31  48 255]

Asymmetric Percentile scale: 0.7842941490172503, Percentile Zero: 64.0
[  86  119   92 ...  165  254 1339]
