In [3]:
import numpy as np 

np.set_printoptions(suppress=True)

params = np.random.uniform(low=-70,high=170,size=30)

## remarkable values first 
params[0] = params.max()+1
params[1] = params.min()-1
params[2] = 0 

params = np.round(params,2)
print(params)


[164.11 -69.57   0.    40.16  35.85 -20.75 -68.57 163.11 -67.91  67.63
 -14.78 100.45   1.06 -55.9   84.01  35.39 141.49 144.9  -37.54 -52.9
 120.07  83.55 -25.71  68.58 -18.64 -43.07 -20.31  16.62 -12.39  48.58]


In [9]:
def clamp(params_q:np.array,lower_bound:int,upper_bound:int) -> np.array:
    params_q[params_q < lower_bound]=lower_bound
    params_q[params_q > upper_bound]=upper_bound
    return params_q

def Asymmetric_quantization(params:np.array,bits:int) -> tuple[np.array,float,int] : 
    alpha = np.max(params)
    beta = np.min(params)
    scale  = (alpha - beta) / (2**bits -1)
    zero = -1*np.round(beta/scale)
    lower_bound , upper_bound = 0,2**bits-1
    quantized = clamp(np.round(params/scale+zero) , lower_bound,upper_bound).astype(np.int32)
    return quantized,scale,zero
def Asymmetric_Dequantization(params_q:np.array,scale:float,zero:int)->np.array : 
    return scale*(params_q-zero)

def Symmetric_quantization(params:np.array,bits:int) -> tuple[np.array,float,int] : 
    alpha = np.max(np.abs(params))
    scale  = np.abs(alpha) / (2**(bits-1) -1)
    lower_bound , upper_bound = -1*2**(bits-1)-1,2**(bits-1)-1
    quantized = clamp(np.round(params/scale) , lower_bound,upper_bound).astype(np.int32)
    return quantized,scale
def Symmetric_Dequantization(params_q:np.array,scale:float)->np.array : 
    return scale*params_q

def quant_error(params:np.array , params_q:np.array):
    return np.mean((params - params_q)**2)

(asymmetric_q,asymmetric_scale,asymmetric_zero) = Asymmetric_quantization(params,8)
(symmetric_q,symmetric_scale) = Symmetric_quantization(params,8)    

print(f"Original : {np.round(params,2)}")
print(f"")
print(f"Asymmetric scale : {asymmetric_scale} , zero : {asymmetric_zero}")
print(asymmetric_q)
print(f"")
print(f"Symmetric scale : {symmetric_scale}")
print(symmetric_q)


Original : [164.11 -69.57   0.    40.16  35.85 -20.75 -68.57 163.11 -67.91  67.63
 -14.78 100.45   1.06 -55.9   84.01  35.39 141.49 144.9  -37.54 -52.9
 120.07  83.55 -25.71  68.58 -18.64 -43.07 -20.31  16.62 -12.39  48.58]

Asymmetric scale : 0.9163921568627451 , zero : 76.0
[255   0  76 120 115  53   1 254   2 150  60 186  77  15 168 115 230 234
  35  18 207 167  48 151  56  29  54  94  62 129]

Symmetric scale : 1.292204724409449
[127 -54   0  31  28 -16 -53 126 -53  52 -11  78   1 -43  65  27 109 112
 -29 -41  93  65 -20  53 -14 -33 -16  13 -10  38]


In [14]:
print(f"Asymmetric Dequantization : {np.round(Asymmetric_Dequantization(asymmetric_q,asymmetric_scale,asymmetric_zero),2)}")
print(f"")
print(f"Symmetric Dequantization : {np.round(Symmetric_Dequantization(symmetric_q,symmetric_scale),2)}")

Asymmetric Dequantization : [164.03 -69.65   0.    40.32  35.74 -21.08 -68.73 163.12 -67.81  67.81
 -14.66 100.8    0.92 -55.9   84.31  35.74 141.12 144.79 -37.57 -53.15
 120.05  83.39 -25.66  68.73 -18.33 -43.07 -20.16  16.5  -12.83  48.57]

Symmetric Dequantization : [164.11 -69.78   0.    40.06  36.18 -20.68 -68.49 162.82 -68.49  67.19
 -14.21 100.79   1.29 -55.56  83.99  34.89 140.85 144.73 -37.47 -52.98
 120.18  83.99 -25.84  68.49 -18.09 -42.64 -20.68  16.8  -12.92  49.1 ]


In [16]:
print(f"Asymmetric Error : {quant_error(params,np.round(Asymmetric_Dequantization(asymmetric_q,asymmetric_scale,asymmetric_zero),2))}")
print(f"")
print(f"Symmetric Error : {quant_error(params,np.round(Symmetric_Dequantization(symmetric_q,symmetric_scale),2))}")

Asymmetric Error : 0.039553333333333704

Symmetric Error : 0.11787666666666788


In [57]:
params_2 = np.random.uniform(low=-70,high=170,size=20000)

params_2[-1] = 1000 ## outlier

params_2 = np.round(params_2,2)
print(params_2)

[ 131.61  123.37  130.12 ...   47.72   38.83 1000.  ]


In [58]:
def Asymmetric_quantization_percentile(params:np.array,bits:int,percentile:float=99.99) -> tuple[np.array,float,int]:
    alpha = np.percentile(params,percentile)
    beta = np.percentile(params,100-percentile)
    scale = (alpha - beta)/(2**bits-1)
    zero = -1*np.round(beta/scale)
    lower_bound , upper_bound = 0,2**bits-1
    quantized = clamp(np.round(params/scale+zero) , lower_bound,upper_bound).astype(np.int32)
    return quantized,scale,zero
    

In [59]:
(asymmetric_q,asymmetric_scale,asymmetric_zero) = Asymmetric_quantization(params_2,8)
print(f"Original : {np.round(params_2,2)}")
print(f"")
print(f"Asymmetric scale : {asymmetric_scale} , zero : {asymmetric_zero}")
print(asymmetric_q)

Original : [ 131.61  123.37  130.12 ...   47.72   38.83 1000.  ]

Asymmetric scale : 4.196078431372549 , zero : 17.0
[ 48  46  48 ...  28  26 255]


In [60]:
(asymmetric_q_percentile,asymmetric_scale_percentile,asymmetric_zero_percentile) = Asymmetric_quantization_percentile(params_2,8)
print(f"Original : {np.round(params_2,2)}")
print(f"")
print(f"Asymmetric Percentile scale : {asymmetric_scale_percentile} , Percentile zero : {asymmetric_zero_percentile}")
print(asymmetric_q_percentile)

Original : [ 131.61  123.37  130.12 ...   47.72   38.83 1000.  ]

Asymmetric Percentile scale : 0.9410980392156864 , Percentile zero : 74.0
[214 205 212 ... 125 115 255]


In [62]:
print(f"Asymmetric Error : {np.round(quant_error(params_2[:-1],(Asymmetric_Dequantization(asymmetric_q,asymmetric_scale,asymmetric_zero))[:-1]),2)}")
print(f"Asymmetric Percentile Error : {np.round(quant_error(params_2[:-1],(Asymmetric_Dequantization(asymmetric_q_percentile,asymmetric_scale_percentile,asymmetric_zero_percentile))[:-1]),2)}")

Asymmetric Error : 1.47
Asymmetric Percentile Error : 0.07
