# Quantization Basics

---

    1. --- Basic Technique ---

In [None]:
# libraries
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# 100 random values
x = np.linspace(-1.0, 1.0, 100)
print(x[:10])

[-1.         -0.97979798 -0.95959596 -0.93939394 -0.91919192 -0.8989899
 -0.87878788 -0.85858586 -0.83838384 -0.81818182]


In [None]:
# define quantization parameters
x_max = 1.0
x_min = -1.0
q_max = 127
q_min = -128
zero_point = 0

# scale
scale = (x_max - x_min) / (q_max - q_min)
print("Scale:", scale)

Scale: 0.00784313725490196


In [None]:
# quantize
q = np.round((x - zero_point) / scale).astype(int)
q = np.clip(q, q_min, q_max)
print("Scaled value:")
print(q[:10])

Scaled value:
[-128 -125 -122 -120 -117 -115 -112 -109 -107 -104]


In [None]:
# reverse quantization
y_hat = scale * (q - zero_point)
print("Reversed Scaled value:")
print(y_hat[:10])

Reversed Scaled value:
[-1.00392157 -0.98039216 -0.95686275 -0.94117647 -0.91764706 -0.90196078
 -0.87843137 -0.85490196 -0.83921569 -0.81568627]


    2. --- NF4 quantization technique ---

In [None]:
nf4_values = np.array([
    -1.0, -0.75, -0.5, -0.35, -0.2, -0.1, -0.05, -0.01, 0.01, 0.05, 0.1, 0.2,
    0.35, 0.5, 0.75, 1.0
])
y = np.linspace(-1.0, 1.0, 100)
print(y[:10])

[-1.         -0.97979798 -0.95959596 -0.93939394 -0.91919192 -0.8989899
 -0.87878788 -0.85858586 -0.83838384 -0.81818182]


In [None]:
# function to mapping to nearest nf4 values
def quantize(x_vals, nf4_lut):
    indices = np.argmin(np.abs(x_vals[:, None] - nf4_lut[None, :]), axis = 1)
    return nf4_lut[indices]

In [None]:
x_nf4 = quantize(y, nf4_values)
print(x_nf4[:10])

[-1.   -1.   -1.   -1.   -1.   -1.   -1.   -0.75 -0.75 -0.75]
