In [5]:
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)

def compute_scale_and_zero_point(min_value, max_value, num_levels=255):
    # Calculate scale and zero-point for asymmetric quantization
    scale = (max_value - min_value) / num_levels
    zero_point = round(-min_value / scale)
    return scale, zero_point

def quantize(matrix, scale, zero_point, num_levels=255):
    # Perform quantization (with rounding and clipping)
    quantized_matrix = np.round((matrix - matrix.min()) / scale + zero_point).astype(np.uint8)
    quantized_matrix = np.clip(quantized_matrix, 0, num_levels)
    return quantized_matrix

def dequantize(quantized_matrix, scale, zero_point):
    # Perform dequantization (restore the floating-point approximation)
    return (quantized_matrix - zero_point) * scale + quantized_matrix.min()

# 1. Generate a 32x64 matrix and a 64x32 matrix as doubles (random values)
np.random.seed(42)  # For reproducibility
matrix_A = np.random.uniform(-5, 5, size=(32, 64))  # 32x64 matrix
matrix_B = np.random.uniform(-5, 5, size=(64, 32))  # 64x32 matrix

# 2. Compute the scale and zero point for matrix_A and matrix_B
scale_A, zero_point_A = compute_scale_and_zero_point(np.min(matrix_A), np.max(matrix_A))
scale_B, zero_point_B = compute_scale_and_zero_point(np.min(matrix_B), np.max(matrix_B))

# 3. Quantize matrix_A and matrix_B
quantized_A = quantize(matrix_A, scale_A, zero_point_A)
quantized_B = quantize(matrix_B, scale_B, zero_point_B)

# 4. Multiply the matrices
output_matrix = np.dot(matrix_A, matrix_B)

# 5. Compute the scale and zero point for the output matrix
scale_output, zero_point_output = compute_scale_and_zero_point(np.min(output_matrix), np.max(output_matrix))

# 6. Quantize the output matrix
quantized_output = quantize(output_matrix, scale_output, zero_point_output)

# Output results
print("Matrix A (32x64):\n", matrix_A)
print("\nMatrix B (64x32):\n", matrix_B)
print("\nQuantized Matrix A (uint8):\n", quantized_A)
print("\nQuantized Matrix B (uint8):\n", quantized_B)
print("\nScale and Zero Point for Matrix A: Scale = {:.5f}, Zero Point = {}".format(scale_A, zero_point_A))
print("\nScale and Zero Point for Matrix B: Scale = {:.5f}, Zero Point = {}".format(scale_B, zero_point_B))
print("\nScale and Zero Point for Output Matrix: Scale = {:.5f}, Zero Point = {}".format(scale_output, zero_point_output))
print("\nQuantized Output Matrix (uint8):\n", quantized_output)


Matrix A (32x64):
 [[-1.25459881  4.50714306  2.31993942  0.98658484 -3.4398136  -3.4400548
  -4.41916388  3.66176146  1.01115012  2.08072578 -4.79415506  4.69909852
   3.32442641 -2.87660889 -3.18175033 -3.1659549  -1.95757757  0.24756432
  -0.68054981 -2.0877086   1.11852895 -3.60506139 -2.07855351 -1.33638157
  -0.43930016  2.85175961 -3.00326218  0.14234438  0.92414569 -4.53549587
   1.07544852 -3.29475876 -4.34948407  4.48885537  4.65632033  3.08397348
  -1.95386231 -4.02327886  1.84233027 -0.59847506 -3.77961765 -0.0482309
  -4.65611479  4.09320402 -2.41220018  1.62522284 -1.88288924  0.20068021
   0.46710279 -3.15145544  4.69584628  2.75132823  4.39498942  3.9482735
   0.97899979  4.21874235 -4.11507498 -3.04017138 -4.54772711 -1.74669669
  -1.1132271  -2.28650968  3.28737509 -1.43246673]
 [-2.1906549   0.42696083 -3.59075775  3.02196981 -4.25449356  4.86886937
   2.72244769 -3.01284318 -4.94477883  3.15461428  2.06857344  2.29007168
   2.71270347 -4.25955348 -1.41534271 -3.8413