In [26]:
import numpy as np
import os
import sys
import copy

In [27]:
def histgram_range(x):
    hist, range = np.histogram(x, 100)
    total = len(x)
    left = 0
    right = len(hist) - 1
    limit = 0.99
    while True:
        cover_percent = hist[left:right].sum()/total
        if cover_percent <= limit:
            break
            
        if hist[left] > hist[right]:
            right -= 1
        else:
            left += 1
            
    left_val = range[left]
    right_val = range[right]
    dynamic_range = max(abs(left_val), abs(right_val))
    return dynamic_range/127.

In [46]:
##### KL 散度校准
def KL(p, q):
    pk = 1.0 * p / np.sum(p)
    qk = 1.0 * q / np.sum(q)
    t = 0
    for i in range(pk.shape[0]):
        t += pk[i] * np.log(pk[i]) - pk[i] * np.log(qk[i])
        
    return t
    
def entropy(x, target_bin = 128):
    # 计算最大绝对值
    amax = np.abs(x).max()
    # 计算直方图分布
    distribution, _ = np.histogram(x, bins=2048, range=(0, amax))
    # 遍历直方图分布
    distribution = distribution[1:]
    length = distribution.size
    # 定义 KL 散度
    kl_divergence = np.zeros(length-target_bin)
    # 遍历 [127:2047]
    for threshold in range(target_bin, length):
        sliced_nd_hist = copy.deepcopy(distribution[:threshold])
        # 复制切分分布
        p = sliced_nd_hist.copy()
        threshold_sum = sum(distribution[:threshold])
        
        # 边界外的组加到边界p[i-1]上, 没有直接
        p[threshold - 1] += threshold_sum
        is_nonzeros = (p != 0).astype(np.int64)
        
        # 合并bins, 步长为: num_merged_bins = sliced_nd_hist.size // target_bin = 16
        quantized_bins = np.zeros(target_bin, dtype=np.int64)
        num_merged_bins = sliced_nd_hist.size // target_bin
        
        for j in range(target_bin):
            start = j * num_merged_bins
            stop = start + num_merged_bins
            quantized_bins[j] = sliced_nd_hist[start:stop].sum()
            quantized_bins[-1] += sliced_nd_hist[target_bin * num_merged_bins:].sum()
            
            # 定义分布： q, 这里的size 要和 p 分布一致, 也就是和sliced_hd_hist 分布一致
            q = np.zeros(sliced_nd_hist.size, dtype=np.float64)
                
            # 根据步长结合 p 的 非零以及 quant_p 来以步长填充 q
            for j in range(target_bin):
                start = j * num_merged_bins
                stop = -1 if j == target_bin - 1 else start + num_merged_bins
                norm = is_nonzeros[start:stop].sum()
                q[start:stop] = float(quantized_bins[j]) / float(norm) if norm != 0 else q[start:stop]
                
                p = p / sum(p)
                q = q / sum(q)
                
                # 计算 KL 散度
                kl_divergence[threshold - target_bin] = KL(p, q)
                
        min_kl_divergence = np.argmin(kl_divergence)
        threshold_value = min_kl_divergence + target_bin
        
        scale = (threshold_value + 0.5) * (amax / 2048) / 127.0
        
        return scale
    

In [47]:
x = np.random.seed(1)

weights = np.random.randn(1000)

In [48]:
scale_2 = histgram_range(weights)
scale_3 = entropy(weights)
print(scale_2, scale_3)

  q = q / sum(q)
  t += pk[i] * np.log(pk[i]) - pk[i] * np.log(qk[i])
  t += pk[i] * np.log(pk[i]) - pk[i] * np.log(qk[i])


0.022388924156632888 0.0019557411396902615


In [85]:
### compare

import numpy as np 
import copy


# Max 校准 
def maxq(value):
    dynamic_range = np.abs(value).max()
    scale = dynamic_range / 127.0
    return scale

# 直方图校准 
def histogramq(value):
    # 计算直方图
    hist, bins = np.histogram(value, 100)
    total = len(value)
    left, right = 0, len(hist)
    limit = 0.99
    while True:
        nleft = left + 1
        nright = right + 1
        left_cover = hist[nleft:right].sum() / total
        right_cover = hist[left:nright].sum() / total
        # 判断是否 left 和 right 都小于limit 的限度，True 退出
        if left_cover < limit and right_cover < limit:
            break
        if left_cover > right_cover:
            left += 1
        else:
            right -= 1
                
        # 根据直方图占比和limit 计算的left 和right 边界，确定value 中的数值边界
        low, high = bins[left], bins[right - 1]
        # 计算最大绝对值边界
        dynamic_range = max(abs(low), abs(high))
        # 计算scale
        scale = dynamic_range / 127.0
        return scale
    
    
# KL 散度校准 
def KL(p, q):
    pk = 1.0 * p / np.sum(p)
    qk = 1.0 * q / np.sum(q)
    t = 0
    for i in range(pk.shape[0]):
        t += pk[i] * np.log(pk[i]) - pk[i] * np.log(qk[i])
    return t

def entropy(value, target_bin=128):
    # 计算最大绝对值
    amax = np.abs(value).max()
    # 计算直方图分布
    distribution, _ = np.histogram(value, bins=2048, range = (0, amax))
    # 遍历直方图分布
    distribution = distribution[1:]
    length = distribution.size
    # 定义KL散度
    kl_divergence = np.zeros(length - target_bin)
    # 遍历 [128:2047]
    for threshold in range(target_bin, length):
        sliced_nd_hist = copy.deepcopy(distribution[:threshold])
        # 复制切分分布为：p
        p = sliced_nd_hist.copy()
        threshold_sum = sum(distribution[threshold:])
        
#         边界外的组加到边界p[i-1]上，没有直接
        p[threshold - 1] += threshold_sum
        is_nonzeros = (p != 0).astype(np.int64)
        
#   合并bins, 步长为： num_merged_bins = sliced_nd_hist.size // target_bin = 16
        quantized_bins = np.zeros(target_bin, dtype=np.int64)
        num_merged_bins = sliced_nd_hist.size // target_bin
        
        for j in range(target_bin): 
            start = j * num_merged_bins
            stop = start + num_merged_bins
            quantized_bins[j] = sliced_nd_hist[start:stop].sum()
            quantized_bins[-1] += sliced_nd_hist[target_bin * num_merged_bins:].sum()
            
            # 定义分布: q , 这里的size 要和p分布一致， 也就是和sliced_hd_hist 分布一致
            q = np.zeros(sliced_nd_hist.size, dtype=np.float64)
            
        # 根据步长结合p的非零以及 quant_p, 来以步长填充 q
        for j in range(target_bin):
            start = j * num_merged_bins
            stop = -1 if j == target_bin - 1 else start + num_merged_bins
            norm = is_nonzeros[start:stop].sum()
            q[start:stop] = float(quantized_bins[j]) / float(norm) if norm != 0 else q[start:stop]
            
        p = p / sum(p)
        q = q / sum(q)
        
        # 计算KL散度
        kl_divergence[threshold - target_bin] = KL(p, q)
        
    min_kl_divergence = np.argmin(kl_divergence)
    threshold_value = min_kl_divergence + target_bin
    scale = (threshold_value + 0.5) * (amax / 2048) / 127.0
        
    return scale
        
# int8截断， 注意，-128去调不要 
def saturate(x):
    return np.clip(np.round(x), -127, +127)


class Quant:
    def __init__(self, value, s='max') -> None:
        if s == 'max':
            self.scale = maxq(value)
        if s == 'histogram':
            self.scale = histogramq(value)
        if s == 'entropy':
            self.scale = entropy(value)
    
    def __call__(self, f):
        return saturate(f / self.scale)
    
        
def Quant_Conv(x, w, b, iq, wq, oq=None):
    alpha = iq.scale * wq.scale
    out_int32 = iq(x) * wq(w)
    if oq is None:  
        return out_int32 * alpha + b
    else:
        return saturate((out_int32 * alpha + b) / oq.scale)
        
        
if __name__ == '__main__':
    nelem = 1000
    for s in ['entropy', 'histogram', 'max']:
    
#     for s in ['histogram', 'max']:
        np.random.seed(1)
        # 生成随机权重、输入与偏置向量
        x = np.random.randn(nelem)
        weight1 = np.random.randn(nelem)
        bias1 = np.random.randn(nelem)
        
        # 计算第一层卷积计算的结果输出（fp32)
        t = x * weight1 + bias1
        weight2 = np.random.randn(nelem)
        bias2 = np.random.randn(nelem)
        
        
        # 计算第二层卷积计算的结果输出（fp32)
        y = t * weight2 + bias2
        # 分别对输入、权重以及中间层输出（也是下一层的输入）进行量化校准
        xQ = Quant(x, s)
        w1Q = Quant(weight1, s)

        tQ = Quant(t, s)
        w2Q = Quant(weight2, s)
        qt = Quant_Conv(x, weight1, bias1, xQ, w1Q, tQ)
        # int8计算的结果输出
        y2 = Quant_Conv(qt, weight2, bias2, tQ, w2Q)
        # 计算量化计算的均方差
        y_diff = (np.abs(y-y2) ** 2).mean()
        print(s, " mse error: ", y_diff)

  t += pk[i] * np.log(pk[i]) - pk[i] * np.log(qk[i])
  t += pk[i] * np.log(pk[i]) - pk[i] * np.log(qk[i])


entropy  mse error:  1.8921579013846344
histogram  mse error:  21.659925933513303
max  mse error:  23.191319960042705
