# 量化矩阵相乘

In [1]:
import numpy as np
from concrete.ml.quantization.quantizers import(
    QuantizedArray,
    MinMaxQuantizationStats,
    QuantizationOptions,
    UniformQuantizationParameters
)

## 创建两个矩阵

In [3]:
# 创建两个浮点数矩阵
matrix1 = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64)
matrix2 = np.array([[5.0, 6.0], [7.0, 8.0]], dtype=np.float64)

# 使用numpy.dot函数计算矩阵相乘的结果
result_dot = np.dot(matrix1, matrix2)

# 或者使用@运算符计算矩阵相乘的结果
result_at = matrix1 @ matrix2

print("矩阵1:")
print(matrix1)

print("\n矩阵2:")
print(matrix2)

矩阵1:
[[1. 2.]
 [3. 4.]]

矩阵2:
[[5. 6.]
 [7. 8.]]


## 将矩阵1和矩阵2量化

In [4]:
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
q_matrix1 = QuantizedArray(8,matrix1,options=options)
q_matrix2 = QuantizedArray(8,matrix2,options=options)
print("q_matrix1:",q_matrix1.qvalues)
print("q_matrix2:",q_matrix2.qvalues)

q_matrix1: [[ 32  64]
 [ 95 127]]
q_matrix2: [[ 79  95]
 [111 127]]


矩阵量化参数

In [5]:
print("q_matrix1:",q_matrix1.quantizer.dump_dict())
print("*"*100)
print("q_matrix2:",q_matrix2.quantizer.dump_dict())

q_matrix1: {'n_bits': 8, 'is_signed': True, 'is_symmetric': True, 'is_qat': False, 'is_narrow': False, 'is_precomputed_qat': False, 'rmax': 4.0, 'rmin': 1.0, 'uvalues': array([1., 2., 3., 4.]), 'scale': 0.031496062992125984, 'zero_point': 0, 'offset': 128, 'no_clipping': False}
****************************************************************************************************
q_matrix2: {'n_bits': 8, 'is_signed': True, 'is_symmetric': True, 'is_qat': False, 'is_narrow': False, 'is_precomputed_qat': False, 'rmax': 8.0, 'rmin': 5.0, 'uvalues': array([5., 6., 7., 8.]), 'scale': 0.06299212598425197, 'zero_point': 0, 'offset': 128, 'no_clipping': False}


## 量化矩阵相乘

In [6]:
q_result = q_matrix1.qvalues @ q_matrix2.qvalues
print("q_result:",q_result)

q_result: [[ 9632 11168]
 [21602 25154]]


## 反量化矩阵相乘的结果

In [7]:
result = q_result*q_matrix1.quantizer.scale * q_matrix2.quantizer.scale
print("result:",result)

result: [[19.10992622 22.15735631]
 [42.85845372 49.90563581]]


量化矩阵相乘的结果

In [8]:
print("real_result:",q_matrix1.values @ q_matrix2.values)

real_result: [[19. 22.]
 [43. 50.]]


## 使用QuantizedMatMul计算矩阵相乘

In [9]:
from concrete.ml.quantization.quantized_ops import (
    QuantizedMatMul,
)

In [10]:
q_mm=QuantizedMatMul(
    8,
    "Test_"+"QuantizedMatmul",
    int_input_names={"0"},
    constant_inputs={"b": q_matrix2},
)
expected_mm_outputs = q_mm.calibrate(matrix1)
print("expected_gemm_outputs:",expected_mm_outputs)
actual_mm_outputs = q_mm(q_matrix1)
print("actual_gemm_outputs:",actual_mm_outputs.dequant())

expected_gemm_outputs: [[19. 22.]
 [43. 50.]]
actual_gemm_outputs: [[18.96470588 22.00392157]
 [42.91372549 49.96470588]]


## 使用QuantizedGemm计算矩阵相乘

In [11]:
from concrete.ml.quantization.quantized_ops import (
    QuantizedGemm,
)

In [12]:
q_gemm = QuantizedGemm(
        8,
        "Test_" + "QuantizedGemm",
        int_input_names={"0"},
        constant_inputs={"b": q_matrix2},
    )
expected_gemm_outputs = q_gemm.calibrate(matrix1)
print("expected_gemm_outputs:",expected_gemm_outputs)
actual_gemm_outputs = q_gemm(q_matrix1)
print("actual_gemm_outputs:",actual_gemm_outputs.dequant())

expected_gemm_outputs: [[19. 22.]
 [43. 50.]]
actual_gemm_outputs: [[18.96470588 22.00392157]
 [42.91372549 49.96470588]]


## 将矩阵相乘编译成电路

In [13]:
from concrete import fhe
import numpy as np

In [14]:
weight=np.random.rand(576,10)
bias=np.random.rand(10)
print("weight:",weight)
print("bias:",bias)


weight: [[0.29866332 0.53118062 0.21773419 ... 0.29728277 0.88437553 0.46548094]
 [0.80666771 0.2900616  0.76745915 ... 0.09926808 0.81027607 0.43771638]
 [0.07560522 0.09372396 0.21049474 ... 0.66247873 0.00188731 0.90234339]
 ...
 [0.59346738 0.72741228 0.11590441 ... 0.64907804 0.78052024 0.69502247]
 [0.66344919 0.95057771 0.93593391 ... 0.69857797 0.0895137  0.67533205]
 [0.07059111 0.1754342  0.97671722 ... 0.50604694 0.02302833 0.878599  ]]
bias: [0.77533138 0.6681513  0.95731044 0.18328553 0.1158954  0.09975622
 0.86337917 0.134197   0.11127182 0.2222513 ]


In [15]:
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
q_weight=QuantizedArray(8,weight,options=options)
print("q_weight:",q_weight.qvalues)

q_weight: [[ 38  67  28 ...  38 112  59]
 [102  37  97 ...  13 103  56]
 [ 10  12  27 ...  84   0 115]
 ...
 [ 75  92  15 ...  82  99  88]
 [ 84 121 119 ...  89  11  86]
 [  9  22 124 ...  64   3 112]]


In [16]:
configuration = fhe.Configuration(
    enable_unsafe_features=True,
    show_mlir=False,
    show_graph=True,
)
@fhe.compiler({"q_x": "encrypted"})
def f_lr(q_x):
    res = q_x @ q_weight.qvalues
    return res

In [17]:
inputset = [np.random.randint(-128, 128, size=(576,)) for i in range(10000)]

In [18]:
circuit = f_lr.compile(inputset,configuration=configuration)


Computation Graph
--------------------------------------------------------------------------------
%0 = q_x                              # EncryptedTensor<int8, shape=(576,)>        ∈ [-128, 127]
%1 = [[ 38  67  ...    3 112]]        # ClearTensor<uint7, shape=(576, 10)>        ∈ [0, 127]
%2 = matmul(%0, %1)                   # EncryptedTensor<int21, shape=(10,)>        ∈ [-594815, 555436]
return %2
--------------------------------------------------------------------------------



In [19]:
input=np.random.randn(576,)
print("input:",input.shape)

input: (576,)


In [20]:
q_input=QuantizedArray(8,input,options=options)
print("q_input:",q_input.qvalues)

q_input: [  -8   26    4   -5  -77   14  -35   29  -56   27    3    2  -31  -10
  -26   -6   43  -15   11   11  -57   19   18   -8    5   81  -18   -7
  -13 -103   31   27   11   15  -71  -26  -19   14  -69   73  -15   36
  -33  -63   -6  -37   72   22    9  -28  -78  -25  -29  -52  -39  -15
   25  -26  -21  -20    4   -1  -37   65   -3   43   37    8  -56  -17
  -33  -53  -43  -70  -20   11    6   19   64   23  -29   47  -60  -37
  -46   18    3   22   -4   -1   53   64  -52  -13   24   -9   16  -12
   -8  -10   11  -28  -56  -55    6  -83    0  -21   82   12    8   -9
  -20  -15    7   24   -5  -29  -14    5   34  -10   34   41    0  -44
  -54   60   -4  -23  -56  -82   20  -11  -26   45    9   15  -63  -20
   10   36   35    9  -28   73  -40   -3  -29   13  -25   17    3   37
    6  -91   -2  -40   67    7   19   22  -27   17  -83    6   -3  -40
    3 -115   32   56  -33   31  -52    9   67   18 -120  -24   25    8
  -21   21  -10  -15   29   13    1   21   30  -19   22  -33  -70   

In [35]:
import time
start=time.time()
q_result=circuit.encrypt_run_decrypt(q_input.qvalues)
end=time.time()
print("time",end-start)
deq_result=q_result*q_weight.quantizer.scale*q_input.quantizer.scale
print("deq_result:",deq_result)
print("real_result:",input@weight)

time 0.029926538467407227
deq_result: [-21.91856705 -33.19481763 -31.41169982 -31.54911328 -27.04134096
 -30.13080276 -31.79055382 -31.19204185 -17.91321907 -26.98332194]
real_result: [-22.02448833 -33.28010295 -31.24053553 -31.28968421 -27.15589172
 -30.02334052 -31.76845443 -31.15041306 -17.93298441 -26.89012417]


## 添加偏置

In [None]:
stats=MinMaxQuantizationStats()
stats.compute_quantization_stats(input@weight)
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
q_bias=QuantizedArray(8,bias,options=options,stats=stats)
print("q_bias:",q_bias.quantizer.dump_dict())

In [None]:
q_result2=q_input.qvalues@q_weight.qvalues+q_bias.qvalues
print(q_result2)
deq_result2=q_result2*q_weight.quantizer.scale*q_input.quantizer.scale
print(deq_result2)
print(input@weight+bias)

## 溢出问题
在计算矩阵相乘时，元素会经历相乘和相加两个操作，这两个操作都可能导致溢出。在量化矩阵相乘时，我们需要考虑溢出问题。所以在进行量化之前应该输入一个校准数据集，来确定量化参数。

In [None]:
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
calibration_datasets=inputset@weight
quantized_samples = QuantizedArray(8,calibration_datasets,options=options)

params = quantized_samples.quantizer.quant_params
stats = quantized_samples.quantizer.quant_stats


In [None]:
q_weight=QuantizedArray(8,weight,options=options,params=params,stats=stats)
q_bias=QuantizedArray(8,bias,options=options,params=params,stats=stats)
print("q_weight:",q_weight.qvalues)
print("q_bias:",q_bias.qvalues)