<a href="https://colab.research.google.com/github/01PrathamS/Quantization/blob/main/notebooks/quantization_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# create a simple tensor with random items

In [4]:
import numpy as np

# Suppress scientific notation
# np.set_printoptions(suppress=True)

# Generate randomly distributed parameters
params = np.random.uniform(low=-50, high=150, size=20)

# Make sure important values are at the beginning for better debugging
params[0] = params.max() + 1
params[1] = params.min() - 1
params[2] = 0

# Round each number to the second decimal place
params = np.round(params,2)

# print the parameter
print(params)

[145.52 -42.68   0.    83.22  14.48 116.39 140.99 -18.53  38.84 -41.68
  69.48 105.9   34.99 127.08 144.52 -30.65  20.28  93.93 107.87 107.56]


# Define the quantization methdos and quantize

In [9]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
  params_q[params_q < lower_bound] = lower_bound
  params_q[params_q > upper_bound] = upper_bound
  return params_q

def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float, int]:
  # calculate the scale and zero point
  alpha = np.max(params)
  beta = np.min(params)
  scale = (alpha - beta) / (2**bits-1)
  zero = -1*np.round(beta / scale)
  lower_bound, upper_bound = 0, 2**bits-1

  # Quantize the parameters
  quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
  return quantized, scale, zero

def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
  return (params_q - zero) * scale

def sysmetric_dequantize(params_q: np.array, scale: float) -> np.array:
  return params_q * scale

def symmetric_quantize(params_q: np.array, bits: float) -> np.array:
  # Calculate the scale and zero point
  alpha = np.max(params_q)
  scale = alpha / (2**(bits-1)-1)
  lower_bound = -2**(bits-1)
  upper_bound = 2**(bits-1)-1
  # Quantize the parameters
  quantized = clamp(np.round(params/scale), lower_bound, upper_bound).astype(np.int32)
  return quantized, scale

def quantization_error(params: np.array, params_q: np.array):
  # Calculate the MSE
  return np.mean((params - params_q)**2)

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
(symmetric_q, symmetric_scale) = symmetric_quantize(params, 8)

print(f"Original:")
print(np.round(params, 2))
print("")
print(f"Asymmetric scale: {asymmetric_scale}, zero: {asymmetric_zero}")
print(asymmetric_q)
print("")
print(f"Symmetric Scale: {symmetric_scale}")
print(symmetric_q)


Original:
[145.52 -42.68   0.    83.22  14.48 116.39 140.99 -18.53  38.84 -41.68
  69.48 105.9   34.99 127.08 144.52 -30.65  20.28  93.93 107.87 107.56]

Asymmetric scale: 0.7380392156862746, zero: 58.0
[255   0  58 171  78 216 249  33 111   2 152 201 105 230 254  16  85 185
 204 204]

Symmetric Scale: 1.1458267716535433
[127 -37   0  73  13 102 123 -16  34 -36  61  92  31 111 126 -27  18  82
  94  94]


In [11]:
# Dequantize the parameterse back to 32 bits
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_symmetric = sysmetric_dequantize(symmetric_q, symmetric_scale)

print(f"Original:")
print(np.round(params, 2))
print(" ")
print(f"Dequantize Asymmetric:")
print(np.round(params_deq_asymmetric, 2))
print(" ")
print(f"Dequantize Symmetric:")
print(np.round(params_deq_symmetric, 2))

Original:
[145.52 -42.68   0.    83.22  14.48 116.39 140.99 -18.53  38.84 -41.68
  69.48 105.9   34.99 127.08 144.52 -30.65  20.28  93.93 107.87 107.56]
 
Dequantize Asymmetric:
[145.39 -42.81   0.    83.4   14.76 116.61 140.97 -18.45  39.12 -41.33
  69.38 105.54  34.69 126.94 144.66 -31.    19.93  93.73 107.75 107.75]
 
Dequantize Symmetric:
[145.52 -42.4    0.    83.65  14.9  116.87 140.94 -18.33  38.96 -41.25
  69.9  105.42  35.52 127.19 144.37 -30.94  20.62  93.96 107.71 107.71]


In [14]:
# Calculate the quantization error
print(f'{"Asymmetric error: ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')

  Asymmetric error: 0.05
   Symmetric error: 0.09
