# Quantization Demo

Reference: [ChatGPT](https://chat.openai.com/share/ed886a22-1480-45a5-ada7-e4f1272a2482)

In [None]:
import numpy as np

## Int8 Quantization

In [None]:
def quantize_fp16_to_int8(fp16_weights):
    # 找到權重的絕對最大值
    abs_max_val = np.max(np.abs(fp16_weights))

    # 計算縮放係數 Scaling Factor
    scale = 127 / abs_max_val

    # 將 FP16 權重轉換為 INT8
    weights = fp16_weights * scale
    int8_weights = np.round(weights).astype(np.int8)

    return int8_weights, scale


def dequantize_int8_to_fp16(int8_weights, scale):
    # 將 INT8 權重轉換回 FP16
    return int8_weights.astype(np.float16) / scale

In [None]:
# 假設我們有一組 FP16 權重
w_fp16 = np.array([1.0, -0.5, 0.25, -0.125], dtype=np.float16)
print(f"Original FP16 Weights: {w_fp16}")

# 量化 FP16 權重到 INT8
w_int8, sf = quantize_fp16_to_int8(w_fp16)
print(f"Int8 Weights: {w_int8}")
print(f"Quantization Constant: {sf}")

# 從 Int8 權重重建 FP16 權重
rw_fp16 = dequantize_int8_to_fp16(w_int8, sf)
print(f"Reconstructed FP16 Weights: {rw_fp16}")

# 觀察輸出差異
x = np.array([1.0, 0.25, -0.5, -1.0])
y = np.matmul(x, w_fp16)
qy = np.matmul(x, rw_fp16)
loss = y - qy

print(f"Original Output: {y}")
print(f"Quantized Output: {qy}")
print(f"Output Loss: {loss}")

Original FP16 Weights: [ 1.    -0.5    0.25  -0.125]
Int8 Weights: [127 -64  32 -16]
Quantization Constant: 127.0
Reconstructed FP16 Weights: [ 1.    -0.504  0.252 -0.126]
Original Output: 0.875
Quantized Output: 0.8740234375
Output Loss: 0.0009765625


## Int4 Quantization

In [None]:
def quantize_fp16_to_int4(fp16_weights):
    # 找到權重的絕對最大值
    abs_max_val = np.max(np.abs(fp16_weights))

    # 計算縮放係數 Scaling Factor
    scale = 7 / abs_max_val

    # 將 FP16 權重轉換為 INT4
    weights = fp16_weights * scale
    int4_weights = np.round(weights).astype(np.int8)

    # 將數據限制在 INT4 範圍內 [-8, 7]
    int4_weights = np.clip(int4_weights, -8, 7)

    return int4_weights, scale


def dequantize_int4_to_fp16(int4_weights, scale):
    # 將 INT4 權重轉換回 FP16
    return int4_weights.astype(np.float16) / scale

In [None]:
# 假設我們有一組 FP16 權重
w_fp16 = np.array([1.0, -0.5, 0.25, -0.125], dtype=np.float16)
print(f"Original FP16 Weights: {w_fp16}")

# 量化 FP16 權重到 INT4
w_int4, sf = quantize_fp16_to_int4(w_fp16)
print(f"Int8 Weights: {w_int4}")
print(f"Quantization Constant: {sf}")

# 將 INT4 權重反量化回 FP16
rw_fp16 = dequantize_int4_to_fp16(w_int4, sf)
print(f"Reconstructed FP16 Weights: {rw_fp16}")

# 觀察輸出差異
x = np.array([1.0, 0.25, -0.5, -1.0])
y = np.matmul(x, w_fp16)
qy = np.matmul(x, rw_fp16)
loss = y - qy

print(f"Original Output: {y}")
print(f"Quantized Output: {qy}")
print(f"Output Loss: {loss}")

Original FP16 Weights: [ 1.    -0.5    0.25  -0.125]
Int8 Weights: [ 7 -4  2 -1]
Quantization Constant: 7.0
Reconstructed FP16 Weights: [ 1.     -0.5713  0.2856 -0.1428]
Original Output: 0.875
Quantized Output: 0.857177734375
Output Loss: 0.017822265625


## 實際運算

In [None]:
# 初始化隨機數生成器的種子，以確保結果可重現
np.random.seed(2135)

# 設定輸入樣本數量、隱藏層大小與輸出類別數量
inn_size = 12
hid_size = 512
out_size = 4096

# 產生模型輸入、隱藏層權重與分類器權重
x = np.random.randn(inn_size, hid_size).astype(np.float16)
w_fp16 = np.random.randn(hid_size, hid_size).astype(np.float16)
clf = np.random.randn(hid_size, out_size).astype(np.float16)

hid = np.matmul(x, w_fp16)  # 計算隱藏層輸出
out = np.matmul(hid, clf)   # 計算分類器輸出
y = np.argmax(out, -1)      # 實際類別預測

In [None]:
# 進行 INT8 量化並再次計算一次
w_int8, sf = quantize_fp16_to_int8(w_fp16)
rw_fp16 = dequantize_int8_to_fp16(w_int8, sf)
hid = np.matmul(x, rw_fp16)
out = np.matmul(hid, clf)
y_int8 = np.argmax(out, -1)

# 比較 INT8 量化的預測結果與原始輸出的差異
errors_int8 = np.sum(np.not_equal(y, y_int8))

print(f"FP16 Prediction: {y}")
print(f"INT8 Prediction: {y_int8}")
print(f"INT8 Error: {errors_int8}, Results: {y == y_int8}")

FP16 Prediction: [3956  874  201  109  843   46 1419 1058  865 2894 2059 1386]
INT8 Prediction: [3956  874  201  109  843   46 1419 1058 1495 2894 2059 1386]
INT8 Error: 1, Results: [ True  True  True  True  True  True  True  True False  True  True  True]


In [None]:
# 進行 INT4 量化並再次計算一次
w_int4, sf = quantize_fp16_to_int4(w_fp16)
rw_fp16 = dequantize_int4_to_fp16(w_int4, sf)
hid = np.matmul(x, rw_fp16)
out = np.matmul(hid, clf)
y_int4 = np.argmax(out, -1)

# 比較 INT4 量化的預測結果與原始輸出的差異
errors_int4 = np.sum(np.not_equal(y, y_int4))

print(f"FP16 Prediction: {y}")
print(f"INT4 Prediction: {y_int4}")
print(f"INT4 Error: {errors_int4}, Results: {y == y_int4}")

FP16 Prediction: [3956  874  201  109  843   46 1419 1058  865 2894 2059 1386]
INT4 Prediction: [3956 3782  242 1582  514 1716 2584 1058  865 2436 2059 1386]
INT4 Error: 7, Results: [ True False False False False False False  True  True False  True  True]


## 實驗結果

INT8 測試 12 個樣本只錯了 1 個

但 INT4 錯了 7 個，誤差很大！