# Create a simple tensor with random items

In [None]:
import numpy as np

# Suppress scientific notation
np.set_printoptions(suppress=True)

# Generate randomly distributed parameters
params = np.random.uniform(low=-50, high=150, size=20)

# Make sure important values are at the beginning for better debugging
params[0] = params.max() + 1
params[1] = params.min() - 1
params[2] = 0

# Round each number to the second decimal place
params = np.round(params, 2)

# Print the parameters
print(params)

[144.98 -43.02   0.   -15.28  28.86 127.93  29.88 -42.02  96.57 103.79
  -6.5  106.55  -4.28  -5.7  -19.36  -1.78  73.15 -23.7  -21.38 143.98]


# Define the quantization methods and quantize

In [None]:
def clamp(params_q: np.array, lower_bound: int, upper_bound: int) -> np.array:
    params_q[params_q < lower_bound] = lower_bound
    params_q[params_q > upper_bound] = upper_bound
    return params_q

In [None]:
def asymmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float, int]:
    # Calculate the scale and zero point
    alpha = np.max(params)
    beta = np.min(params)
    scale = (alpha - beta) / (2**bits-1)
    zero = -1*np.round(beta / scale)
    lower_bound, upper_bound = 0, 2**bits-1
    # Quantize the parameters
    quantized = clamp(np.round(params / scale + zero), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale, zero

def asymmetric_dequantize(params_q: np.array, scale: float, zero: int) -> np.array:
    return (params_q - zero) * scale

(asymmetric_q, asymmetric_scale, asymmetric_zero) = asymmetric_quantization(params, 8)
print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Asymmetric scale: {asymmetric_scale}, zero: {asymmetric_zero}')
print(asymmetric_q)

Original:
[144.98 -43.02   0.   -15.28  28.86 127.93  29.88 -42.02  96.57 103.79
  -6.5  106.55  -4.28  -5.7  -19.36  -1.78  73.15 -23.7  -21.38 143.98]

Asymmetric scale: 0.7372549019607844, zero: 58.0
[255   0  58  37  97 232  99   1 189 199  49 203  52  50  32  56 157  26
  29 253]


In [None]:
def symmetric_quantization(params: np.array, bits: int) -> tuple[np.array, float]:
    # Calculate the scale
    alpha = np.max(np.abs(params))
    scale = alpha / (2**(bits-1)-1)
    lower_bound = -2**(bits-1)
    upper_bound = 2**(bits-1)-1
    # Quantize the parameters
    quantized = clamp(np.round(params / scale), lower_bound, upper_bound).astype(np.int32)
    return quantized, scale

def symmetric_dequantize(params_q: np.array, scale: float) -> np.array:
    return params_q * scale

(symmetric_q, symmetric_scale) = symmetric_quantization(params, 8)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Symmetric scale: {symmetric_scale}')
print(symmetric_q)

Original:
[144.98 -43.02   0.   -15.28  28.86 127.93  29.88 -42.02  96.57 103.79
  -6.5  106.55  -4.28  -5.7  -19.36  -1.78  73.15 -23.7  -21.38 143.98]

Symmetric scale: 1.1415748031496062
[127 -38   0 -13  25 112  26 -37  85  91  -6  93  -4  -5 -17  -2  64 -21
 -19 126]


In [None]:
def quantization_error(params: np.array, params_q_dq: np.array):
    # calculate the MSE
    return np.mean((params - params_q_dq)**2)

In [None]:
# Dequantize the parameters back to 32 bits
params_deq_asymmetric = asymmetric_dequantize(asymmetric_q, asymmetric_scale, asymmetric_zero)
params_deq_symmetric = symmetric_dequantize(symmetric_q, symmetric_scale)

print(f'Original:')
print(np.round(params, 2))
print('')
print(f'Dequantize Asymmetric:')
print(np.round(params_deq_asymmetric,2))
print('')
print(f'Dequantize Symmetric:')
print(np.round(params_deq_symmetric, 2))

Original:
[144.98 -43.02   0.   -15.28  28.86 127.93  29.88 -42.02  96.57 103.79
  -6.5  106.55  -4.28  -5.7  -19.36  -1.78  73.15 -23.7  -21.38 143.98]

Dequantize Asymmetric:
[145.24 -42.76   0.   -15.48  28.75 128.28  30.23 -42.02  96.58 103.95
  -6.64 106.9   -4.42  -5.9  -19.17  -1.47  72.99 -23.59 -21.38 143.76]

Dequantize Symmetric:
[144.98 -43.38   0.   -14.84  28.54 127.86  29.68 -42.24  97.03 103.88
  -6.85 106.17  -4.57  -5.71 -19.41  -2.28  73.06 -23.97 -21.69 143.84]


In [None]:
# Calculate the quantization error
print(f'{"Asymmetric error: ":>20}{np.round(quantization_error(params, params_deq_asymmetric), 2)}')
print(f'{"Symmetric error: ":>20}{np.round(quantization_error(params, params_deq_symmetric), 2)}')

  Asymmetric error: 0.04
   Symmetric error: 0.08


# Post-Training Quantization (PTQ)

In [None]:
# Post-Training Quantization (PTQ) Example using PyTorch and inbuilt image
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import FakeData
from torch.utils.data import DataLoader
import torch.nn.functional as F

# Load a pretrained model
model_fp32 = models.mobilenet_v2(pretrained=True).eval()

# Apply dynamic quantization to linear layers
model_ptq = torch.quantization.quantize_dynamic(model_fp32, {torch.nn.Linear}, dtype=torch.qint8)

# Create fake image data (inbuilt dataset)
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
fake_dataset = FakeData(size=1, image_size=(3, 224, 224), transform=transform)
fake_loader = DataLoader(fake_dataset, batch_size=1)

# Get one sample
input_tensor, _ = next(iter(fake_loader))

# Inference
with torch.no_grad():
    output_fp32 = model_fp32(input_tensor)
    output_ptq = model_ptq(input_tensor)

print("Original Model Prediction (top-1 index):", torch.argmax(output_fp32, dim=1).item())
print("PTQ Model Prediction (top-1 index):", torch.argmax(output_ptq, dim=1).item())

# Calculate quantization error (using Mean Squared Error)
quantization_error = F.mse_loss(output_fp32, output_ptq)
print(f"\nQuantization Error (MSE): {quantization_error.item():.4f}")

Original Model Prediction (top-1 index): 644
PTQ Model Prediction (top-1 index): 644

Quantization Error (MSE): 0.0003


For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  model_ptq = torch.quantization.quantize_dynamic(model_fp32, {torch.nn.Linear}, dtype=torch.qint8)
