<a href="https://colab.research.google.com/github/AlphardXyl/Algorithms-of-Foundation-Models/blob/main/Quantization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random
import time
import math

In [2]:
class Matrix:
    q_min = None
    q_max = None
    d_min = None
    d_max = None
    d_matrix = None     #量化前的矩阵
    q_matrix = None     #量化后的整数矩阵
    de_matrix = None    #反量化的浮点数矩阵
    scale = None            #比例因子
    zero_point = None            #便宜量
    def __init__(self,
                 d_matrix,          #输入矩阵，需量化
                 bits,                 #数据宽度
                 unsigned = True   #默认为无符号整数
                 ):
        #求输入浮点数矩阵的最大值和最小值
        self.d_matrix = d_matrix
        self.d_min = np.min(self.d_matrix)
        self.d_max = np.max(self.d_matrix)

        self.bits = bits
        self.unsigned = unsigned
        # 量化表示的最大最小值
        # 有符号整数，有符号型的对称量化适合用于关于零大致对称的数据
        # 无符号型的对称量化很适合单尾分布的数据，比如RELU的激活值
        if self.unsigned:
            self.q_min = 0
            self.q_max = math.pow(2, self.bits) - 1
        else:
            self.q_min = -math.pow(2, self.bits - 1)
            self.q_max = math.pow(2, self.bits - 1) - 1

        # 求比例因子/规模因子
        self.scale = (self.d_max - self.d_min) / (self.q_max - self.q_min)
        # 求偏移量，保证零点的量化没有误差，
        # 可确保像zero padding或ReLU这样的常规操作不会引起量化错误
        self.zero_point = np.round(self.q_min - (self.d_min / self.scale))

    #单一矩阵做量化
    def quant(self):
         B = self.d_matrix / self.scale  + self.zero_point
         self.q_matrix = np.round(np.clip(B, self.q_min, self.q_max))
         return self.q_matrix

    # 单一矩阵进行反量化
    def dequant(self):
        self.de_matrix = self.scale * (self.q_matrix - self.zero_point)
        return self.de_matrix


In [3]:
A = np.random.rand(5,5)
B = np.random.rand(5,5)
print(A)
print(B)

[[0.73240079 0.89004225 0.48354005 0.26454259 0.29483055]
 [0.69249232 0.00470905 0.94349743 0.15928171 0.06216644]
 [0.96357621 0.04829028 0.4329498  0.93150658 0.94311457]
 [0.0547627  0.26454855 0.66332886 0.50795928 0.66512683]
 [0.88365271 0.74941684 0.25898767 0.94220583 0.44689147]]
[[0.57643921 0.67052365 0.4356318  0.97408749 0.77778911]
 [0.59634418 0.44019563 0.91910832 0.27502851 0.41685161]
 [0.37599485 0.61869722 0.40745316 0.83529881 0.09033823]
 [0.48548435 0.44496265 0.6298603  0.39900219 0.08523221]
 [0.43778719 0.09340143 0.6268506  0.35819252 0.45442867]]


In [4]:
Am = Matrix(A, 8)
Am.quant()

array([[194., 236., 128.,  69.,  77.],
       [183.,   0., 250.,  41.,  16.],
       [255.,  12., 114., 247., 250.],
       [ 14.,  69., 175., 134., 176.],
       [234., 198.,  68., 250., 118.]])

In [5]:
def quant(Mat):
  Mat_scale = (np.max(Mat) - np.min(Mat))/255
  Mat_offset = np.round(np.min(Mat)/Mat_scale)
  Mat_int = np.clip(np.round(Mat / Mat_scale - Mat_offset), 0, 255)
  return Mat_int

def dequant(Mat_int, Mat):
  Mat_scale = (np.max(Mat) - np.min(Mat))/255
  Mat_offset = np.round(np.min(Mat)/Mat_scale)
  Mat_deq = (Mat_int + Mat_offset) * Mat_scale
  return Mat_deq

def quant_error(Mat):
  Mat_int = quant(Mat)
  Mat_deq = dequant(Mat_int, Mat)
  error = np.sum((Mat - Mat_deq)**2 / 2)

In [6]:
def quant_multiply(M1, M2, bit):
  Mat1 = Matrix(M1, bit)
  Mat2 = Matrix(M2, bit)
  M = Mat1.scale * Mat2.scale
  P = (Mat1.quant() - Mat1.zero_point) @ (Mat2.quant() - Mat2.zero_point)
  return M*P

In [7]:
A_scale = (np.max(A) - np.min(A))/255
B_scale = (np.max(B) - np.min(B))/255
A_offset = np.round(np.min(A)/A_scale)
B_offset = np.round(np.min(B)/B_scale)
A_int = np.clip(np.round(A / A_scale - A_offset), 0, 255)
B_int = np.clip(np.round(B / B_scale - B_offset), 0, 255)
print(A_int)
print(B_int)

[[194. 236. 128.  69.  77.]
 [183.   0. 250.  41.  16.]
 [255.  12. 114. 247. 250.]
 [ 14.  69. 175. 134. 176.]
 [234. 198.  68. 250. 118.]]
[[141. 168. 101. 255. 199.]
 [147. 102. 240.  55.  96.]
 [ 84. 153.  93. 216.   2.]
 [115. 104. 157.  90.   0.]
 [102.   3. 156.  79. 106.]]


In [8]:
A_deq = (A_int + A_offset) * A_scale
B_deq = (B_int + B_offset) * B_scale
print(A_deq)
print(B_deq)

[[0.73325136 0.89118242 0.48507397 0.26321844 0.29330054]
 [0.69188846 0.00376026 0.94382611 0.15793106 0.06392448]
 [0.96262742 0.04888342 0.43243029 0.93254532 0.94382611]
 [0.05640395 0.26321844 0.66180635 0.50763555 0.66556662]
 [0.88366189 0.74829241 0.25945817 0.94382611 0.44747134]]
[[0.57514166 0.66925574 0.43571338 0.97251225 0.77731266]
 [0.5960559  0.43919908 0.92022665 0.27537085 0.41828484]
 [0.37645636 0.61697014 0.40782772 0.83656968 0.09062838]
 [0.48451327 0.4461705  0.63091297 0.3973706  0.08365697]
 [0.43919908 0.09411409 0.62742726 0.35902782 0.45314191]]


In [9]:
A_error = np.sum((A - A_deq)**2) / 2
B_error = np.sum((B - B_deq)**2) / 2
print(A_error)
print(B_error)

1.4696072572351001e-05
1.4690358065327065e-05


In [10]:
T = quant_multiply(A,B,8)
T1 = A @ B
T2 = (A_int + A_offset) @ (B_int + B_offset) * (A_scale * B_scale)
print(T)
print(T1)
print(T2)
Mult_error = np.sum((T - T1)**2) / 2
print(Mult_error)

[[1.39187727 1.32645936 1.68749653 1.5741981  1.14162195]
 [0.86007981 1.12349473 0.82959251 1.5491896  0.66710285]
 [1.61193366 1.4374116  1.82130769 2.02081202 1.31360122]
 [0.97674678 0.85079993 1.27456804 1.12165972 0.55798561]
 [1.70575483 1.5433438  2.05566401 1.81818818 1.30512087]]
[[1.39226895 1.32729882 1.68556182 1.57326904 1.14087806]
 [0.86128257 1.12682547 0.82972511 1.54976682 0.667636  ]
 [1.61214298 1.43779653 1.81846615 2.02302095 1.31667355]
 [0.97652788 0.85171923 1.27415913 1.12110018 0.55834247]
 [1.70673002 1.54362171 2.05286005 1.81921351 1.30647414]]
[[1.39187727 1.32645936 1.68749653 1.5741981  1.14162195]
 [0.86007981 1.12349473 0.82959251 1.5491896  0.66710285]
 [1.61193366 1.4374116  1.82130769 2.02081202 1.31360122]
 [0.97674678 0.85079993 1.27456804 1.12165972 0.55798561]
 [1.70575483 1.5433438  2.05566401 1.81818818 1.30512087]]
2.7525905605032084e-05


In [51]:
test = np.fromfile('Inception_v3.bin', dtype = np.float32)
test.shape

(23851520,)

In [36]:
test_q = np.fromfile('Inception_v3_quantized.bin', dtype = np.int8)
test_q.shape

(23951360,)

In [46]:
np.max(test)

1.4552433e+34