# L3-A - Linear Quantization II: Symmetric vs. Asymmetric Mode 



There are two modes in linear quantization: 
1. Asymmetric: We map $[r_{\text{min}}, r_{\text{max}}] \to [q_{\text{min}}, q_{\text{max}}]$ 
   this is what we implemented in the previous lesson.

2. Symmetric: We map $[-r_{\text{min}}, r_{\text{max}}] \to [-q_{\text{min}}, q_{\text{max}}]$

   where we can set $r_{\text{max}}$ = max(|r_tensor|)

   We don't need to use the zero point(z=0).

   this happens because the floating-point range and the quantized range are symmetric with respect to zero.

   Hence, we can simplify the equations to:

$$
 \begin{cases}
 q = int(round(r/s))\\
 s =r_{\text{max}} / q_{\text{max}}\\
 \end{cases}
$$

In [1]:
!pip install numpy



In [3]:
!pip install torch



In [9]:
import numpy as np

In [10]:
import torch

In [14]:
def get_q_scale_symmetric(tensor, dtype = torch.int8):
    r_max = tensor.abs().max().item()
    q_max = torch.iinfo(dtype).max
    return r_max/q_max

In [15]:
test_tensor = torch.randn((4,4))

In [16]:
test_tensor

tensor([[ 0.4157, -1.7389, -1.4991, -0.1394],
        [ 1.2122, -0.0865,  0.7849, -0.9915],
        [-0.2894, -0.8335, -0.0195, -0.1266],
        [ 0.3407,  0.6924, -0.0360,  2.1102]])

In [17]:
s = get_q_scale_symmetric(test_tensor)

In [18]:
s

0.016615937075277015

In [19]:
def linear_q_with_scale_and_zero_point(
    tensor, scale, zero_point, dtype=torch.int8):
    scaled_and_shifted_tensor = tensor / scale + zero_point
    
    rounded_tensor = torch.round(scaled_and_shifted_tensor)
    #最后一步是，确保我们的舍入张量在最小量化值和最大量化值之间
    q_min = torch.iinfo(dtype).min#iInfo方法获取最小值和最大值
    q_max = torch.iinfo(dtype).max

    q_tensor = rounded_tensor.clamp(q_min, q_max).to(dtype)#定义量化张量（使用to()函数转换为我们想要的量化数据类型）
    return q_tensor

In [20]:
def linear_dequantization(quantized_tensor, scale, zero_point):
    return scale * (quantized_tensor.float() - zero_point)

In [21]:
def linear_q_symmetric(tensor, dtype = torch.int8):
    scale = get_q_scale_symmetric(tensor, dtype)
    quantized_tensor = linear_q_with_scale_and_zero_point(tensor, scale, zero_point = 0, dtype=dtype)
    return quantized_tensor, scale

In [None]:
#quantized_tensor, scale = get_q_symmetric(test_tensor, dtype = torch.int8)

In [30]:
quantized_tensor, scale = linear_q_symmetric(test_tensor, dtype = torch.int8)

In [31]:
dequantized_tensor = linear_dequantization(quantized_tensor, scale, zero_point=0)

In [32]:
dequantized_tensor

tensor([[ 0.4154, -1.7447, -1.4954, -0.1329],
        [ 1.2130, -0.0831,  0.7809, -0.9970],
        [-0.2825, -0.8308, -0.0166, -0.1329],
        [ 0.3489,  0.6979, -0.0332,  2.1102]])

In [33]:
def quantization_error(original_tensor, dequantized_tensor, error_type="mse"):
    """
    计算量化误差的通用函数（支持 PyTorch 和 NumPy 张量）
    
    参数：
        original_tensor      : 原始浮点张量 (torch.Tensor/np.ndarray)
        dequantized_tensor   : 反量化后的重建张量 (与原始张量同类型同形状)
        error_type          : 误差计算方式，可选 "mse"(默认) 或 "mae"
    
    返回：
        量化误差值 (float)
    """
    # 校验输入类型一致性
    if type(original_tensor) != type(dequantized_tensor):
        raise TypeError("原始张量与反量化张量类型必须一致")

    # 校验形状一致性
    if original_tensor.shape != dequantized_tensor.shape:
        raise ValueError("张量形状不匹配")

    # 自动检测计算框架
    if isinstance(original_tensor, torch.Tensor):
        lib = torch
    elif isinstance(original_tensor, np.ndarray):
        lib = np
    else:
        raise TypeError("仅支持 PyTorch 或 NumPy 张量")

    # 计算误差
    diff = original_tensor - dequantized_tensor
    if error_type == "mse":
        error = lib.mean(diff ** 2)
    elif error_type == "mae":
        error = lib.mean(lib.abs(diff))
    else:
        raise ValueError("error_type 必须为 'mse' 或 'mae'")

    # 返回标量值
    return error.item() if lib == torch else float(error)

In [34]:
e = quantization_error(test_tensor, dequantized_tensor)

In [35]:
e

2.2271613488555886e-05