# Quantization

Lecture 19 | CMU ANLP Fall 2025 | Instructor: Sean Welleck

This notebook shows basic quantization concepts.


#### Absmax quantization

In [None]:
import numpy as np
x = np.array([-3.0, 1.0, 2.0, 4.0])

if np.max(np.abs(x)) != 0:
    scale = 127.0 / np.max(np.abs(x))
else:
    scale = 1.0

x_quantized = np.round(x * scale).astype(np.int8)
x_dequantized = x_quantized.astype(np.float32) / scale

print("Original array:    ", x)
print("Quantized array:   ", x_quantized)
print("Dequantized array: ", x_dequantized)

mse = np.mean((x - x_dequantized) ** 2)
print("Mean Squared Error:", mse)


Original array:     [-3.  1.  2.  4.]
Quantized array:    [-95  32  64 127]
Dequantized array:  [-2.99212598  1.00787402  2.01574803  4.        ]
Mean Squared Error: 9.300018600037166e-05


In [22]:
def absmax_quantize(x):
    if np.max(np.abs(x)) != 0:
        scale = 127.0 / np.max(np.abs(x))
    else:
        scale = 1.0

    x_quantized = np.round(x * scale).astype(np.int8)
    x_dequantized = x_quantized.astype(np.float32) / scale

    return x_quantized, x_dequantized

def analyze(x, x_quantized, x_dequantized):
    print("Original array:    ", x)
    print("Quantized array:   ", x_quantized)
    print("Dequantized array: ", x_dequantized)
    mse = np.mean((x - x_dequantized) ** 2)
    print(f"Mean Squared Error: {mse:.6f}")

In [23]:
x = np.array([-3.112, 1.567, 2.789, 4.345])
x_quantized, x_dequantized = absmax_quantize(x)
analyze(x, x_quantized, x_dequantized)


Original array:     [-3.112  1.567  2.789  4.345]
Quantized array:    [-91  46  82 127]
Dequantized array:  [-3.11334646  1.57377953  2.80543307  4.345     ]
Mean Squared Error: 0.000079


### Zero-point quantization

In [None]:
import numpy as np
x = np.array([-3.0, 1.0, 2.0, 4.0])
x_min = np.min(x)
x_max = np.max(x)

qmin, qmax = -128, 127

if x_max - x_min != 0:
    scale = (qmax - qmin) / (x_max - x_min)
else:
    scale = 1.0

zero_point = np.round(qmin - x_min * scale)
zero_point = np.clip(zero_point, qmin, qmax).astype(np.int8)

x_quantized = np.round(x * scale + zero_point)
x_quantized = np.clip(x_quantized, qmin, qmax).astype(np.int8)

x_dequantized = (x_quantized.astype(np.float32) - zero_point) / scale

print("Original array:    ", x)
print("Quantized array:   ", x_quantized)
print("Dequantized array: ", x_dequantized)
print("Scale", scale)
print("Zero-point", zero_point)
mse = np.mean((x - x_dequantized) ** 2)
print("Mean Squared Error:", mse)

Original array:     [-3.  1.  2.  4.]
Quantized array:    [-128   17   54  127]
Dequantized array:  [-2.99215686  0.98823529  2.00392157  4.00784314]
Scale 36.42857142857143
Zero-point -19
Mean Squared Error: 6.92041522491351e-05


In [25]:
def zero_point_quantize(x):
    x_min = np.min(x)
    x_max = np.max(x)

    qmin, qmax = -128, 127

    if x_max - x_min != 0:
        scale = (qmax - qmin) / (x_max - x_min)
    else:
        scale = 1.0
        
    zero_point = np.round(qmin - x_min * scale)
    zero_point = np.clip(zero_point, qmin, qmax).astype(np.int8)
    x_quantized = np.round(x * scale + zero_point)
    x_quantized = np.clip(x_quantized, qmin, qmax).astype(np.int8)
    x_dequantized = (x_quantized.astype(np.float32) - zero_point) / scale
    return x_quantized, x_dequantized

### Outlier example

In [None]:
x_outlier = np.array([-0.3, 0.1, 0.2, 0.4, -0.3, 0.1, 0.2, 0.4, -0.3, 0.1, 0.2, 100.0])
x_non_outlier = np.array([-0.3, 0.1, 0.2, 0.4, -0.3, 0.1, 0.2, 0.4, -0.3, 0.1, 0.2, 0.4])

# Run each kind of quantization on outliers and non-outliers
for x in [x_outlier, x_non_outlier]:
    print("\nWith outlier:" if x is x_outlier else "\nWithout outlier:")

    print("\nAbsmax Quantization:")
    x_quantized, x_dequantized = absmax_quantize(x)
    analyze(x, x_quantized, x_dequantized)

    print("\nZero-point Quantization:")
    x_quantized, x_dequantized = zero_point_quantize(x)
    analyze(x, x_quantized, x_dequantized)


With outlier:

Absmax Quantization:
Original array:     [ -0.3   0.1   0.2   0.4  -0.3   0.1   0.2   0.4  -0.3   0.1   0.2 100. ]
Quantized array:    [  0   0   0   1   0   0   0   1   0   0   0 127]
Dequantized array:  [  0.           0.           0.           0.78740157   0.
   0.           0.           0.78740157   0.           0.
   0.         100.        ]
Mean Squared Error: 0.060013

Zero-point Quantization:
Original array:     [ -0.3   0.1   0.2   0.4  -0.3   0.1   0.2   0.4  -0.3   0.1   0.2 100. ]
Quantized array:    [-128 -127 -126 -126 -128 -127 -126 -126 -128 -127 -126  127]
Dequantized array:  [-0.39333333  0.          0.39333333  0.39333333 -0.39333333  0.
  0.39333333  0.39333333 -0.39333333  0.          0.39333333 99.90666667]
Mean Squared Error: 0.014756

Without outlier:

Absmax Quantization:
Original array:     [-0.3  0.1  0.2  0.4 -0.3  0.1  0.2  0.4 -0.3  0.1  0.2  0.4]
Quantized array:    [-95  32  64 127 -95  32  64 127 -95  32  64 127]
Dequantized array:  [-0.

In [None]:
# return the scale, which will be useful for the matrix multiplication example
def absmax_quantize(x):
    if np.max(np.abs(x)) != 0:
        scale = 127.0 / np.max(np.abs(x))
    else:
        scale = 1.0

    x_quantized = np.round(x * scale).astype(np.int8)
    return x_quantized, scale

def dequantize(x_quantized, scale):
    x_dequantized = x_quantized.astype(np.float32) / scale
    return x_dequantized

### Matrix multiplication example

In [None]:
# example for matrix multiplication 

outliers = True

h = 4
o = 3
T = 8

W = np.random.randn(h, o).astype(np.float16)
X = np.random.randn(T, h).astype(np.float16)

if outliers:
    # Introduce outlier features in X
    X[:, 1] *= 100.0

W_quantized, W_scale = absmax_quantize(W)
X_quantized, X_scale = absmax_quantize(X)

matmul_scale = W_scale * X_scale

# In reality, we would use a int8xint8 -> int32 kernel
Y = X_quantized.astype(np.int32).dot(W_quantized.astype(np.int32))
Y_dequantized = Y.astype(np.float32) / matmul_scale

print("W (Original):\n", W)
print("X (Original):\n", X)
print("W_quantized:\n", W_quantized)
print("X_quantized:\n", X_quantized)
print("Y (Quantized MatMul Result before scaling):\n", Y)

print("Dequantized MatMul Result:\n", Y_dequantized)
# Compare with float16 matmul
Y_float16 = np.matmul(X.astype(np.float16), W.astype(np.float16))
print("Float16 MatMul Result:\n", Y_float16)
mse = np.mean((Y_float16 - Y_dequantized) ** 2)
print("Mean Squared Error between float16 and quantized matmul:", mse)


W (Original):
 [[ 1.05     0.746    0.05606]
 [ 0.5435  -0.7725   0.8413 ]
 [ 1.889    0.2646   2.355  ]
 [-0.1483  -1.635    0.542  ]]
X (Original):
 [[-2.084e+00 -2.498e+01 -1.690e+00 -7.336e-02]
 [-1.869e-01 -3.581e+01 -1.252e+00 -5.234e-01]
 [-4.277e-01  2.052e+01 -6.880e-01  3.694e-01]
 [ 1.839e+00  1.438e+02  1.112e+00 -1.826e+00]
 [ 1.476e+00 -2.453e+01  1.535e+00 -6.396e-01]
 [-1.732e+00 -6.819e+01  1.178e+00 -2.742e-01]
 [ 1.381e+00 -6.009e+01  3.245e-01  9.492e-01]
 [ 1.128e+00 -7.227e-01  1.777e+00 -1.615e+00]]
W_quantized:
 [[ 57  40   3]
 [ 29 -42  45]
 [102  14 127]
 [ -8 -88  29]]
X_quantized:
 [[ -2 -22  -1   0]
 [  0 -32  -1   0]
 [  0  18  -1   0]
 [  2 127   1  -2]
 [  1 -22   1  -1]
 [ -2 -60   1   0]
 [  1 -53   0   1]
 [  1  -1   2  -1]]
Y (Quantized MatMul Result before scaling):
 [[ -854   830 -1123]
 [-1030  1330 -1567]
 [  420  -770   683]
 [ 3915 -5064  5790]
 [ -471  1066  -889]
 [-1752  2454 -2579]
 [-1488  2178 -2353]
 [  240   198   183]]
Dequantized MatM