# Quantization by using min-max-
  * The entire range of data(tensor) is mapped into specified range.
  * The mapping process is done by using min and values of the tensors.
# Impact of outlier(using min-max)-
  * When outliers in the data,they will stretch the range of values.
  * This results wastage of memory. Our model fails to outlier

# Quantization by using percentile-
  * Percentile based quantization divides the data into equal size segments. It peanalize only outliers instead of whole data.

  * Due to its robust to outliers nature
  

In [1]:
import numpy as np

np.set_printoptions(suppress=True)

# Generate randomly distributed parameter
params=np.random.uniform(low=-50,high=150,size=10000)
# Add outlier
params[-1]=1000
# round the parameter
params=np.round(params,2)
print(params)

[  69.    -35.22   81.74 ...   97.75   40.08 1000.  ]


# Define Quantization methods and Quantize

In [2]:
def clamp(params_q:np.array,lower_bound:int,upper_bound:int)->np.array:
  params_q[params_q<lower_bound]=lower_bound
  params_q[params_q>upper_bound]=upper_bound
  return params_q
# This will help us to calculate the min & max value of the tensor

# Compare min-max and percentile range selection

In [3]:
# Min-Max
def asymmetric_quantization(params:np.array,bits:int)->tuple[np.array,float,int]:
  alpha=np.max(params)
  beta=np.min(params)
  scale=(alpha-beta)/(2**bits-1)
  zero=-1*np.round(beta/scale)
  lower_bound,upper_bound=0,2**bits-1
  quantized=clamp(np.round(params/scale+zero),lower_bound,upper_bound).astype(np.int32)
  return quantized,scale,zero

In [4]:
# Percentile
def asymmetric_quantization_percentile(params:np.array,bits:int,percentile:float=99.99)->tuple[np.array,float,int]:
  alpha=np.percentile(params,percentile)
  beta=np.percentile(params,100-percentile)
  scale=(alpha-beta)/(2**bits-1)
  zero=-1*np.round(beta/scale)
  lower_bound,upper_bound=0,2**bits-1
  quantized=clamp(np.round(params/scale+zero),lower_bound,upper_bound).astype(np.int32)
  return quantized,scale,zero

In [5]:
def asymmetric_dequantize(params_q:np.array,scale:float,zero:int)->np.array:
  return (params_q-zero)*scale

def quantization_error(params:np.array,params_q:np.array):
  return np.mean((params-params_q)**2)

In [6]:

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params, 8)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric (min-max) scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print(f'')
print(f'Asymmetric (percentile) scale: {asymmetric_scale_percentile}, zero: {asymmetric_zero_percentile}')
print(asymmetric_q_percentile)

Original:
[  69.    -35.22   81.74 ...   97.75   40.08 1000.  ]

Asymmetric (min-max) scale: 4.11756862745098, zero: 12.0
[ 29   3  32 ...  36  22 255]

Asymmetric (percentile) scale: 0.7844509921545053, zero: 64.0
[152  19 168 ... 189 115 255]


In [7]:
#Dequantize the parameters back to 32 bits
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_asymmetric_percentile = asymmetric_dequantize(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantized (min-max):')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantized (percentile):')
print(np.round(params_deq_asymmetric_percentile,2))

Original:
[  69.    -35.22   81.74 ...   97.75   40.08 1000.  ]

Dequantized (min-max):
[  70.    -37.06   82.35 ...   98.82   41.18 1000.57]

Dequantized (percentile):
[ 69.03 -35.3   81.58 ...  98.06  40.01 149.83]


# Evaluate Dequantization Error

In [8]:
# Calculate the quantization error
print(f'{"Error (min-max) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric[:-1]),2)}')
print(f'{"Error (percentile) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric_percentile[:-1]),2)}')

     Error (min-max) excluding outlier: 1.4
  Error (percentile) excluding outlier: 0.05


# Conclustion-
  Percentile range selection can minimize the error. It is better than min-max range selection