# 量化矩阵相乘

In [2]:
import numpy as np
from concrete.ml.quantization.quantizers import(
    QuantizedArray,
    MinMaxQuantizationStats,
    QuantizationOptions,
    UniformQuantizationParameters
)

## 创建两个矩阵

In [3]:
# 创建两个浮点数矩阵
matrix1 = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64)
matrix2 = np.array([[5.0, 6.0], [7.0, 8.0]], dtype=np.float64)

# 使用numpy.dot函数计算矩阵相乘的结果
result_dot = np.dot(matrix1, matrix2)

# 或者使用@运算符计算矩阵相乘的结果
result_at = matrix1 @ matrix2

print("矩阵1:")
print(matrix1)

print("\n矩阵2:")
print(matrix2)

矩阵1:
[[1. 2.]
 [3. 4.]]

矩阵2:
[[5. 6.]
 [7. 8.]]


## 将矩阵1和矩阵2量化

In [4]:
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
q_matrix1 = QuantizedArray(8,matrix1,options=options)
q_matrix2 = QuantizedArray(8,matrix2,options=options)
print("q_matrix1:",q_matrix1.qvalues)
print("q_matrix2:",q_matrix2.qvalues)

q_matrix1: [[ 32  64]
 [ 95 127]]
q_matrix2: [[ 79  95]
 [111 127]]


矩阵量化参数

In [5]:
print("q_matrix1:",q_matrix1.quantizer.dump_dict())
print("*"*100)
print("q_matrix2:",q_matrix2.quantizer.dump_dict())

q_matrix1: {'n_bits': 8, 'is_signed': True, 'is_symmetric': True, 'is_qat': False, 'is_narrow': False, 'is_precomputed_qat': False, 'rmax': 4.0, 'rmin': 1.0, 'uvalues': array([1., 2., 3., 4.]), 'scale': 0.031496062992125984, 'zero_point': 0, 'offset': 128, 'no_clipping': False}
****************************************************************************************************
q_matrix2: {'n_bits': 8, 'is_signed': True, 'is_symmetric': True, 'is_qat': False, 'is_narrow': False, 'is_precomputed_qat': False, 'rmax': 8.0, 'rmin': 5.0, 'uvalues': array([5., 6., 7., 8.]), 'scale': 0.06299212598425197, 'zero_point': 0, 'offset': 128, 'no_clipping': False}


## 量化矩阵相乘

In [6]:
q_result = q_matrix1.qvalues @ q_matrix2.qvalues
print("q_result:",q_result)

q_result: [[ 9632 11168]
 [21602 25154]]


## 反量化矩阵相乘的结果

In [7]:
result = q_result*q_matrix1.quantizer.scale * q_matrix2.quantizer.scale
print("result:",result)

result: [[19.10992622 22.15735631]
 [42.85845372 49.90563581]]


量化矩阵相乘的结果

In [8]:
print("real_result:",q_matrix1.values @ q_matrix2.values)

real_result: [[19. 22.]
 [43. 50.]]


## 使用QuantizedMatMul计算矩阵相乘

In [9]:
from concrete.ml.quantization.quantized_ops import (
    QuantizedMatMul,
)

In [10]:
q_mm=QuantizedMatMul(
    8,
    "Test_"+"QuantizedMatmul",
    int_input_names={"0"},
    constant_inputs={"b": q_matrix2},
)
expected_mm_outputs = q_mm.calibrate(matrix1)
print("expected_gemm_outputs:",expected_mm_outputs)
actual_mm_outputs = q_mm(q_matrix1)
print("actual_gemm_outputs:",actual_mm_outputs.dequant())

expected_gemm_outputs: [[19. 22.]
 [43. 50.]]
actual_gemm_outputs: [[18.96470588 22.00392157]
 [42.91372549 49.96470588]]


## 使用QuantizedGemm计算矩阵相乘

In [11]:
from concrete.ml.quantization.quantized_ops import (
    QuantizedGemm,
)

In [12]:
q_gemm = QuantizedGemm(
        8,
        "Test_" + "QuantizedGemm",
        int_input_names={"0"},
        constant_inputs={"b": q_matrix2},
    )
expected_gemm_outputs = q_gemm.calibrate(matrix1)
print("expected_gemm_outputs:",expected_gemm_outputs)
actual_gemm_outputs = q_gemm(q_matrix1)
print("actual_gemm_outputs:",actual_gemm_outputs.dequant())

expected_gemm_outputs: [[19. 22.]
 [43. 50.]]
actual_gemm_outputs: [[18.96470588 22.00392157]
 [42.91372549 49.96470588]]


## 将矩阵相乘编译成电路

In [13]:
from concrete import fhe
import numpy as np

In [14]:
weight=np.random.rand(576,10)
bias=np.random.rand(10)
print("weight:",weight)
print("bias:",bias)


weight: [[0.98842096 0.08765085 0.26070036 ... 0.0389806  0.44675294 0.09080959]
 [0.59623538 0.82786726 0.29212342 ... 0.44835206 0.76161856 0.58008714]
 [0.55646692 0.42872063 0.16046165 ... 0.80671794 0.01174179 0.50573076]
 ...
 [0.33913304 0.26573855 0.49188787 ... 0.87890236 0.54885496 0.10563837]
 [0.42444985 0.60750798 0.93760933 ... 0.86801    0.44521928 0.34956334]
 [0.11571159 0.1570743  0.30780795 ... 0.12707277 0.45624764 0.26753028]]
bias: [0.86136278 0.57887083 0.30488831 0.07688525 0.30086415 0.21278767
 0.35213633 0.20966626 0.1043467  0.46128351]


In [46]:
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
q_weight=QuantizedArray(8,weight,options=options)
print("q_weight:",q_weight.qvalues)

q_weight: [[126  11  33 ...   5  57  12]
 [ 76 105  37 ...  57  97  74]
 [ 71  54  20 ... 102   1  64]
 ...
 [ 43  34  62 ... 112  70  13]
 [ 54  77 119 ... 110  57  44]
 [ 15  20  39 ...  16  58  34]]


In [47]:
configuration = fhe.Configuration(
    enable_unsafe_features=True,
    show_mlir=False,
    show_graph=True,
)
@fhe.compiler({"q_x": "encrypted"})
def f_lr(q_x):
    res = q_x @ q_weight.qvalues
    return res

In [48]:
inputset = [np.random.randint(-128, 128, size=(576,)) for i in range(10000)]

In [49]:
circuit = f_lr.compile(inputset,configuration=configuration)


Computation Graph
--------------------------------------------------------------------------------
%0 = q_x                              # EncryptedTensor<int8, shape=(576,)>        ∈ [-128, 127]
%1 = [[126  11  ...   58  34]]        # ClearTensor<uint7, shape=(576, 10)>        ∈ [0, 127]
%2 = matmul(%0, %1)                   # EncryptedTensor<int21, shape=(10,)>        ∈ [-721833, 607319]
return %2
--------------------------------------------------------------------------------



In [50]:
input=np.random.randn(576,)
print("input:",input.shape)

input: (576,)


In [51]:
q_input=QuantizedArray(8,input,options=options)
print("q_input:",q_input.qvalues)

q_input: [ -53   58  -17  -72  -28   37   15  -26  -12   75   43   34  -50  -43
   -4   24  -36  -32    9  -16   65   37   69   14  -13  -57   52  -20
   33   -5  -46  118  -33  -67  -33    1   16  -17   34   16   25    0
   71  -41   16   42   52  -73  -21  -29  -58   24   -6  -92   67  -23
  -18  -19  108   27   24  -99  -14  -20   81  -41    9  -16   -2   20
   12  -26   24   18   73   70  -69   67  -38  -11   15    4    5   18
  -32   49  -51   42   10   35   12   22  -25  -51  -24   37  -38   10
  -18  -10  -34  -22  -68   30  -83   20   19  -66   15   18    4  -39
   11   59  -17  -23   29  -33   25  -62  -14   18   -2    8   29  -10
   25  -15   24   -8  -26  -12   30   30  -40   23   -2  -79   42   37
  -60  -21   61   32  -66   -1  -18    2  -11   14    8  -47   -5   52
    2  -29   29   89   42   25   -8   28    1  -14   34   54   41   88
   42   21    2   47   14  -10  -21   76  -75  -56    3  -38   34  -16
   42   -3    4   98  -43   16  -35   28   19  -41   65  -14   26  -

In [52]:
q_result=circuit.encrypt_run_decrypt(q_input.qvalues)
deq_result=q_result*q_weight.quantizer.scale*q_input.quantizer.scale
print("deq_result:",deq_result)
print("real_result:",input@weight)

deq_result: [-12.74770905 -19.73274707 -14.32666489  -0.81009169 -12.97768443
 -21.16769964   0.46503092  -0.72744123 -16.79035183   3.24720543]
real_result: [-12.71593865 -19.60519293 -14.17264809  -0.76435181 -13.06317095
 -20.97342758   0.54236164  -0.61235716 -16.87038662   3.29810105]


## 添加偏置

In [69]:
stats=MinMaxQuantizationStats()
stats.compute_quantization_stats(input@weight)
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
q_bias=QuantizedArray(8,bias,options=options,stats=stats)
print("q_bias:",q_bias.quantizer.dump_dict())

q_bias: {'n_bits': 8, 'is_signed': True, 'is_symmetric': True, 'is_qat': False, 'is_narrow': False, 'is_precomputed_qat': False, 'rmax': 3.2981010498426007, 'rmin': -20.97342757689982, 'uvalues': array([-20.97, -19.61, -16.87, -14.17, -13.06, -12.72,  -0.76,  -0.61,
         0.54,   3.3 ]), 'scale': 0.16514509903070723, 'zero_point': 0, 'offset': 128, 'no_clipping': False}


In [68]:
q_result2=q_input.qvalues@q_weight.qvalues+q_bias.qvalues
print(q_result2)
deq_result2=q_result2*q_weight.quantizer.scale*q_input.quantizer.scale
print(deq_result2)
print(input@weight+bias)

[5 4 2 0 2 1 2 1 1 3]
[5.55065957 4.44052765 2.22026383 0.         2.22026383 1.11013191
 2.22026383 1.11013191 1.11013191 3.33039574]
[-11.85457587 -19.0263221  -13.86775977  -0.68746656 -12.76230681
 -20.76063991   0.89449797  -0.4026909  -16.76603992   3.75938456]


## 溢出问题
在计算矩阵相乘时，元素会经历相乘和相加两个操作，这两个操作都可能导致溢出。在量化矩阵相乘时，我们需要考虑溢出问题。所以在进行量化之前应该输入一个校准数据集，来确定量化参数。

In [60]:
options=QuantizationOptions(8,is_symmetric=True,is_signed=True)
calibration_datasets=inputset@weight
quantized_samples = QuantizedArray(8,calibration_datasets,options=options)

params = quantized_samples.quantizer.quant_params
stats = quantized_samples.quantizer.quant_stats


In [61]:
q_weight=QuantizedArray(8,weight,options=options,params=params,stats=stats)
q_bias=QuantizedArray(8,bias,options=options,params=params,stats=stats)
print("q_weight:",q_weight.qvalues)
print("q_bias:",q_bias.qvalues)

q_weight: [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
q_bias: [0 0 0 0 0 0 0 0 0 0]
