In [1]:
import numpy as np

params = np.random.uniform(low=-50 , high = 150 , size = 20)

params[0] = params.max() + 1
params[1] = params.min() - 1
params[2] = 0

params = np.round(params,2)

print(params)

[150.42 -44.35   0.   145.14 -30.22  83.57  43.63   6.69 149.42  26.71
 -29.35   6.68 -20.03 -43.35 133.5   70.76  24.14  -9.83 127.01 100.04]


# Now lets define quantization 

In [2]:
# first define clamp

def clamp(params_q:np.array , lower_bound: int , upper_bound : int ) -> np.array:

    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound

    return params_q

def asymmetric_quantization(params : np.array , bits: int) -> tuple[np.array , float , int] :

    alpha = np.max(params)
    beta = np.min(params)

    scale = (alpha - beta )/ (2**bits-1)

    zero = -1*np.round( beta / scale)
    lower_bound ,upper_bound = 0 , 2**bits-1

    quantized = clamp(np.round(params / scale + zero) , lower_bound,upper_bound).astype(np.int32)

    return quantized , scale , zero

def asymmetric_dequantize(params_q:np.array , scale : float , zero : int) -> np.array :
    return (params_q - zero) * scale

def symmetric_dequantize(params_q : np.array , scale : float) -> np.array :
    return params_q * scale

def symmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float]:
    # Calculate the scale
    alpha = np.max(np.abs(params))
    scale = alpha / (2**(bits-1)-1)
    lower_bound = -2**(bits-1)
    upper_bound = 2**(bits-1)-1
    # Quantize the parameters
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

def quantization_error(params: np.array, params_q: np.array):
    # calculate the MSE
    return np.mean((params - params_q)**2)

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(symmetric_q, symmetric_scale) = symmetric_quantization(params, 8)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print('')
print(f'Symmetric scale: {symmetric_scale}')
print(symmetric_q)



Original:
[150.42 -44.35   0.   145.14 -30.22  83.57  43.63   6.69 149.42  26.71
 -29.35   6.68 -20.03 -43.35 133.5   70.76  24.14  -9.83 127.01 100.04]

Asymmetric scale: 0.7638039215686274, zero: 58.0
[255   0  58 248  18 167 115  67 254  93  20  67  32   1 233 151  90  45
 224 189]

Symmetric scale: 1.1844094488188976
[127 -37   0 123 -26  71  37   6 126  23 -25   6 -17 -37 113  60  20  -8
 107  84]


# Now lets dequantized the quantized array

In [3]:
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_symmetric = symmetric_dequantize(symmetric_q, symmetric_scale)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantize Asymmetric:')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantize Symmetric:')
print(np.round(params_deq_symmetric, 2))

Original:
[150.42 -44.35   0.   145.14 -30.22  83.57  43.63   6.69 149.42  26.71
 -29.35   6.68 -20.03 -43.35 133.5   70.76  24.14  -9.83 127.01 100.04]

Dequantize Asymmetric:
[150.47 -44.3    0.   145.12 -30.55  83.25  43.54   6.87 149.71  26.73
 -29.02   6.87 -19.86 -43.54 133.67  71.03  24.44  -9.93 126.79 100.06]

Dequantize Symmetric:
[150.42 -43.82   0.   145.68 -30.79  84.09  43.82   7.11 149.24  27.24
 -29.61   7.11 -20.13 -43.82 133.84  71.06  23.69  -9.48 126.73  99.49]


In [4]:
print(f'{"Asymmetric error: ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')


  Asymmetric error: 0.04
   Symmetric error: 0.16


# Now lets compare min-max and percentile selection strategies

In [11]:
params[-1] = 1000

def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float, int]:
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def asymmetric_quantization_percentile(params: np.array, bits: int, percentile: float = 99.99) -> tuple[np.array, float, int]:
    # find the percentile value
    alpha = np.percentile(params, percentile)
    beta = np.percentile(params, 100-percentile)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits-1
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero


def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
    return (params_q - zero) * scale

def quantization_error(params: np.array, params_q: np.array):
    # calculate the MSE
    return np.mean((params - params_q)**2)

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile) = asymmetric_quantization_percentile(params, 8)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric (min-max) scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)
print(f'')
print(f'Asymmetric (percentile) scale: {asymmetric_scale_percentile}, zero: {asymmetric_zero_percentile}')
print(asymmetric_q_percentile)

Original:
[ 150.42  -44.35    0.    145.14  -30.22   83.57   43.63    6.69  149.42
   26.71  -29.35    6.68  -20.03  -43.35  133.5    70.76   24.14   -9.83
  127.01 1000.  ]

Asymmetric (min-max) scale: 4.095490196078431, zero: 11.0
[ 48   0  11  46   4  31  22  13  47  18   4  13   6   0  44  28  17   9
  42 255]

Asymmetric (percentile) scale: 4.089152541176461, zero: 11.0
[ 48   0  11  46   4  31  22  13  48  18   4  13   6   0  44  28  17   9
  42 255]


In [12]:
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_asymmetric_percentile = asymmetric_dequantize(asymmetric_q_percentile, asymmetric_scale_percentile, asymmetric_zero_percentile)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantized (min-max):')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantized (percentile):')
print(np.round(params_deq_asymmetric_percentile,2))

Original:
[ 150.42  -44.35    0.    145.14  -30.22   83.57   43.63    6.69  149.42
   26.71  -29.35    6.68  -20.03  -43.35  133.5    70.76   24.14   -9.83
  127.01 1000.  ]

Dequantized (min-max):
[151.53 -45.05   0.   143.34 -28.67  81.91  45.05   8.19 147.44  28.67
 -28.67   8.19 -20.48 -45.05 135.15  69.62  24.57  -8.19 126.96 999.3 ]

Dequantized (percentile):
[151.3  -44.98   0.   143.12 -28.62  81.78  44.98   8.18 151.3   28.62
 -28.62   8.18 -20.45 -44.98 134.94  69.52  24.53  -8.18 126.76 997.75]


In [13]:
print(f'{"Error (min-max) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric[:-1]),2)}')
print(f'{"Error (percentile) excluding outlier: ":>40}{np.round(quantization_error(params[:-1], params_deq_asymmetric_percentile[:-1]),2)}')

     Error (min-max) excluding outlier: 1.84
  Error (percentile) excluding outlier: 1.81


Error including the outlier

In [15]:
print(f'{"Error (min-max) including outlier: ":>40}{np.round(quantization_error(params, params_deq_asymmetric),2)}')
print(f'{"Error (percentile) including outlier: ":>40}{np.round(quantization_error(params, params_deq_asymmetric_percentile),2)}')

     Error (min-max) including outlier: 1.77
  Error (percentile) including outlier: 1.97
