In [1]:
import numpy as np
from scipy.signal import convolve2d

actBits = 8
weightBits = 8
outBits = 8
nActs = 10
nWeights = 3

np.random.seed(0)
a = np.random.randint(-2**(actBits-1),2**(actBits-1)-1,size=(nActs,nActs))
w = np.random.randint(-2**(weightBits-1),2**(weightBits-1)-1,size=(nWeights,nWeights))
o = convolve2d(a,w[::-1].T[::-1].T,mode='valid')

256

In [2]:
import numpy as np
from scipy.signal import convolve2d

actBits = 3
weightBits = 3
nActs = 10
nWeights = 3

act_max_value = 2**(actBits-1) - 1
weight_max_value = 2**(weightBits-1) - 1
a = np.full((nActs, nActs), act_max_value)
w = np.full((nWeights, nWeights), weight_max_value)
o = convolve2d(a,w[::-1].T[::-1].T,mode='valid')

Simulation of Quantization per https://github.com/google/gemmlowp/blob/master/doc/quantization.md

In [1]:
# Experiments on low-precision convolution

from stim_lib.quant import *

actBits = 8
weightBits = 8
outBits = 8
nActs = 10
nWeights = 3
nOuts = nActs - nWeights + 1

a = quantized_tensor((nActs,nActs),actBits) 
w = quantized_tensor((nWeights,nWeights),weightBits)
o = scaling_quantized_convolution(a,w,outBits,internalPrecision=16)

o.real_values - convolve_fake_quantized(a,w)
# np.round(o.real_values,3) - np.round(convolve_reals(a,w),3)
# np.allclose(o.real_values,convolve_reals(a,w),rtol=0.001)
# Limit numpy print precision
np.set_printoptions(precision=4)
o.real_values, convolve_fake_quantized(a,w)


(array([[-1.9591, -1.8185, -0.4451, -1.2211,  1.2704, -0.7702, -0.5226,
         -0.1785],
        [-1.2139,  1.0255, -1.7152, -0.9167,  1.0068,  0.47  ,  2.4419,
         -0.3409],
        [ 1.6802,  1.4357,  0.6052,  0.9809, -1.3817, -0.3988,  0.3194,
         -1.1724],
        [-0.448 , -1.6343, -2.3635, -0.1885,  0.8053,  0.8301, -1.9913,
          0.1775],
        [ 0.0038,  0.3326,  0.1842, -0.1984, -1.5375, -0.7156,  0.5515,
          0.7964],
        [-0.3247, -1.0181, -0.8814,  1.096 ,  0.6712,  0.4481,  0.2406,
         -0.2588],
        [-1.8406, -0.204 , -0.1667, -0.7701, -1.6541,  1.2851,  0.5958,
          1.6854],
        [ 0.9127, -0.9696, -0.1113, -0.1411,  0.9557,  0.4367, -1.9155,
          0.559 ]]),
 array([[-1.9591, -1.8186, -0.4451, -1.2211,  1.2704, -0.7702, -0.5226,
         -0.1785],
        [-1.2139,  1.0255, -1.7152, -0.9167,  1.0068,  0.47  ,  2.4419,
         -0.3409],
        [ 1.6802,  1.4358,  0.6052,  0.9809, -1.3817, -0.3988,  0.3194,
         -1.1724

In [182]:
# Experiments on the fixed point scaling
outBits = 8
inBits = 8
fpBits = 16
numSamples = 10

out_scale = np.random.uniform(0,5) / (2**outBits)
m0, shift = convert_scale_to_shift_and_m0(out_scale)
print('real vs fixed point\t',out_scale,m0 * 2**shift)
m0bin = convert_to_fixed_point(m0,fpBits)
print(f'fixed point m0\t {m0bin}')
print(f'shift\t{shift}')
print(f'm0\t{m0}')

m0int = int(m0bin,base=2)

test_int = np.random.randint(-2**(inBits-1),2**(inBits-1)-1) * np.random.randint(-2**(inBits-1),2**(inBits-1)-1)
print('test_int\t',test_int)
scaled = test_int*m0int
scaled_clipped = scaled // (2**fpBits)
scaled_clipped_shifted = int(scaled_clipped * 2**shift)
print('test_int x m0\t',scaled, bin(scaled))
print('shifted by m0 fp\t',scaled_clipped, bin(scaled_clipped))
print('then shifted by shift\t',scaled_clipped_shifted, bin(scaled_clipped_shifted))
out = saturating_clip(scaled_clipped_shifted,outBits=outBits)
print('saturating_clip\t\t',out)
print('m * test_int (ref)\t',test_int*out_scale)


real vs fixed point	 0.010735642631708186 0.010735511779785156
fixed point m0	 1010111111100100
shift	-6
m0	0.68707275390625
test_int	 -10044
test_int x m0	 -452261232 -0b11010111101001111010101110000
shifted by m0 fp	 -6901 -0b1101011110101
then shifted by shift	 -107 -0b1101011
saturating_clip		 -107
m * test_int (ref)	 -107.82879459287702


In [184]:
# Experiments on the fixed point scaling
outBits = 8
inBits = 8
fpBits = 16
numSamples = 10

out_scale = np.random.uniform(0,5,10) / (2**outBits)
m0, shift = np.vectorize(convert_scale_to_shift_and_m0)(out_scale)
m0bin = np.vectorize(convert_to_fixed_point)(m0,fpBits)

m0int = np.vectorize(int)(m0bin,base=2)

test_int = np.random.randint(-2**(inBits-1),2**(inBits-1)-1,10) * np.random.randint(-2**(inBits-1),2**(inBits-1)-1,10)
# print('test_int\t',test_int)
scaled = test_int*m0int
scaled_clipped = scaled // (2**fpBits)
scaled_clipped_shifted = np.vectorize(int)(scaled_clipped / 2**(-shift))
# print('test_int x m0\t',scaled, bin(scaled))
# print('shifted by m0 fp\t',scaled_clipped, bin(scaled_clipped))
# print('then shifted by shift\t',scaled_clipped_shifted, bin(scaled_clipped_shifted))
out = np.vectorize(saturating_clip)(scaled_clipped_shifted,outBits=outBits)
# print('saturating_clip\t',out)


In [10]:
test_num = 3*16+2
m0int = int(m0bin,base=2)
scaled_int = m0int * test_num
bin(scaled_int)
hex(scaled_int)

'0x191900'

In [17]:
w.quantized_values

array([[ 12,  25, 111],
       [-29, -78,  25],
       [ 51, -67, 118]])