In [2]:
def d_to_Qn_n(x, n: int):
    """
    Convert a float to Qn.n fixed-point format.
    
    Parameters:
    x (float): The float number to convert.
    
    Returns:
    int: The Qn.n representation of the input float.
    """
    minval = -2**(n-1)
    maxval = 2**(n-1)-2**(-n)
    if x < minval or x > maxval:
        print('Note: Value out of range for Qn.n format. Clamping to range.')
        # Clamp the value to the range of Qn.n
        x = max(minval, min(maxval, x))
    # Scale and round to nearest integer
    qn_n_value = int(round(x * 2**n))
    qn_n_value = max(-2**(2*n-1), min(2**(2*n-1)-1, qn_n_value))
    
    return qn_n_value

def Qn_n_to_d(x, n: int):
    """
    Convert a Qn.n fixed-point number to float.
    
    Parameters:
    x (int): The Qn.n number to convert.
    
    Returns:
    float: The float representation of the input Qn.n number.
    """
    # Convert to float and scale down
    d_value = x / 2**n
    return d_value



In [3]:
N = -4884482 * 2**(-16)
n8 = d_to_Qn_n(N, 8)
n16 = d_to_Qn_n(N, 16)
print(f"Q8.8: {n8}, Q16.16: {n16}")

Q8.8: -19080, Q16.16: -4884482


In [4]:
import numpy as np
W = np.array([12, 30.5, -10, -0.3])
input = np.array([-1.5, 1.4, -0.3, 10.1])
dotprod = np.dot(W, input)

for n, w in enumerate(W):
    print(f"Weight {n}: decimal = {w}, Q8.8 = {d_to_Qn_n(w, 8)}")
print('')
for n,i in enumerate(input):
    print(f"Input {n}: decimal = {i}, Q8.8 = {d_to_Qn_n(i, 8)}")
print('')
print(f"Dot: decimal = {dotprod}, Q8.8 = {d_to_Qn_n(dotprod, 8)}")


Weight 0: decimal = 12.0, Q8.8 = 3072
Weight 1: decimal = 30.5, Q8.8 = 7808
Weight 2: decimal = -10.0, Q8.8 = -2560
Weight 3: decimal = -0.3, Q8.8 = -77

Input 0: decimal = -1.5, Q8.8 = -384
Input 1: decimal = 1.4, Q8.8 = 358
Input 2: decimal = -0.3, Q8.8 = -77
Input 3: decimal = 10.1, Q8.8 = 2586

Dot: decimal = 24.669999999999995, Q8.8 = 6316


In [5]:
Q8_8_weights1 = np.array([[30, 780, -25, -77],
                      [308, -78, -250, -779],
                      [-302, 788, -250, -77]])

Q8_8_weights2 = np.array([[30, 780, -25],
                      [308, -78, -250],
                      [-302, 788, -250]])

Q8_8_weights3 = np.array([[30, 780, -25],
                      [308, -78, -250]])

d_weights1 = np.zeros((3, 4), dtype=float)
for i in range(3):
    for j in range(4):
        d_weights1[i][j] = Qn_n_to_d(Q8_8_weights1[i][j], 8)

d_weights2 = np.zeros((3, 3), dtype=float)
for i in range(3):
    for j in range(3):
        d_weights2[i][j] = Qn_n_to_d(Q8_8_weights2[i][j], 8)

d_weights3 = np.zeros((2, 3), dtype=float)
for i in range(2):
    for j in range(3):
        d_weights3[i][j] = Qn_n_to_d(Q8_8_weights3[i][j], 8)

Q8_8_in = np.array([-200,35,77,-256])
d_in = np.zeros((4,), dtype=float)
for i in range(4):
    d_in[i] = Qn_n_to_d(Q8_8_in[i], 8)

d_out1 = d_weights1 @ d_in
d_out2 = d_weights2 @ d_out1
d_out = d_weights3 @ d_out2
Q8_8_out = np.zeros((2,), dtype=int)
for i in range(2):
    Q8_8_out[i] = d_to_Qn_n(d_out[i], 8)

print('Q8.8 output:')
print(Q8_8_out)

Q8.8 output:
[-814  874]


In [15]:
Q8_8_weights = np.array([[256, -128],[64,  512]])
Q8_8_bias = np.array([32, -256])

Q8_8_in = np.array([384, -64])

d_weights = np.zeros((2, 2), dtype=float)
for i in range(2):
    for j in range(2):
        d_weights[i][j] = Qn_n_to_d(Q8_8_weights[i][j], 8)

d_bias = np.zeros((2,), dtype=float)
for i in range(2):
    d_bias[i] = Qn_n_to_d(Q8_8_bias[i], 8)

d_in = np.zeros((2,), dtype=float)
for i in range(2):
    d_in[i] = Qn_n_to_d(Q8_8_in[i], 8)

d_out = d_weights @ d_in + d_bias

Q8_8_out = np.zeros((2,), dtype=int)
for i in range(2):
    Q8_8_out[i] = d_to_Qn_n(d_out[i], 8)

print('Q8.8 output with bias:')
print(Q8_8_out)

Q8.8 output with bias:
[ 448 -288]


In [9]:
d_in

array([ 1.5 , -0.25])